r128.h 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161
  1. /*
  2. r128.h: 128-bit (64.64) signed fixed-point arithmetic. Version 1.6.0
  3. COMPILATION
  4. -----------
  5. Drop this header file somewhere in your project and include it wherever it is
  6. needed. There is no separate .c file for this library. To get the code, in ONE
  7. file in your project, put:
  8. #define R128_IMPLEMENTATION
  9. before you include this file. You may also provide a definition for R128_ASSERT
  10. to force the library to use a custom assert macro.
  11. COMPILER/LIBRARY SUPPORT
  12. ------------------------
  13. This library requires a C89 compiler with support for 64-bit integers. If your
  14. compiler does not support the long long data type, the R128_U64, etc. macros
  15. must be set appropriately. On x86 and x64 targets, Intel intrinsics are used
  16. for speed. If your compiler does not support these intrinsics, you can add
  17. #define R128_STDC_ONLY
  18. in your implementation file before including r128.h.
  19. The only C runtime library functionality used by this library is <assert.h>.
  20. This can be avoided by defining an R128_ASSERT macro in your implementation
  21. file. Since this library uses 64-bit arithmetic, this may implicitly add a
  22. runtime library dependency on 32-bit platforms.
  23. C++ SUPPORT
  24. -----------
  25. Operator overloads are supplied for C++ files that include this file. Since all
  26. C++ functions are declared inline (or static inline), the R128_IMPLEMENTATION
  27. file can be either C++ or C.
  28. LICENSE
  29. -------
  30. This is free and unencumbered software released into the public domain.
  31. Anyone is free to copy, modify, publish, use, compile, sell, or
  32. distribute this software, either in source code form or as a compiled
  33. binary, for any purpose, commercial or non-commercial, and by any
  34. means.
  35. In jurisdictions that recognize copyright laws, the author or authors
  36. of this software dedicate any and all copyright interest in the
  37. software to the public domain. We make this dedication for the benefit
  38. of the public at large and to the detriment of our heirs and
  39. successors. We intend this dedication to be an overt act of
  40. relinquishment in perpetuity of all present and future rights to this
  41. software under copyright law.
  42. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  43. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  44. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  45. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  46. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  47. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  48. OTHER DEALINGS IN THE SOFTWARE.
  49. */
  50. #ifndef H_R128_H
  51. #define H_R128_H
  52. #include <stddef.h>
  53. // 64-bit integer support
  54. // If your compiler does not have stdint.h, add appropriate defines for these macros.
  55. #if defined(_MSC_VER) && (_MSC_VER < 1600)
  56. # define R128_S32 __int32
  57. # define R128_U32 unsigned __int32
  58. # define R128_S64 __int64
  59. # define R128_U64 unsigned __int64
  60. # define R128_LIT_S64(x) x##i64
  61. # define R128_LIT_U64(x) x##ui64
  62. #else
  63. # include <stdint.h>
  64. # define R128_S32 int32_t
  65. # define R128_U32 uint32_t
  66. # define R128_S64 long long
  67. # define R128_U64 unsigned long long
  68. # define R128_LIT_S64(x) x##ll
  69. # define R128_LIT_U64(x) x##ull
  70. #endif
  71. #ifdef __cplusplus
  72. extern "C" {
  73. #endif
  74. typedef struct R128 {
  75. R128_U64 lo;
  76. R128_U64 hi;
  77. #ifdef __cplusplus
  78. R128();
  79. R128(double);
  80. R128(int);
  81. R128(R128_S64);
  82. R128(R128_U64 low, R128_U64 high);
  83. operator double() const;
  84. operator R128_S64() const;
  85. operator int() const;
  86. operator bool() const;
  87. bool operator!() const;
  88. R128 operator~() const;
  89. R128 operator-() const;
  90. R128 &operator|=(const R128 &rhs);
  91. R128 &operator&=(const R128 &rhs);
  92. R128 &operator^=(const R128 &rhs);
  93. R128 &operator+=(const R128 &rhs);
  94. R128 &operator-=(const R128 &rhs);
  95. R128 &operator*=(const R128 &rhs);
  96. R128 &operator/=(const R128 &rhs);
  97. R128 &operator%=(const R128 &rhs);
  98. R128 &operator<<=(int amount);
  99. R128 &operator>>=(int amount);
  100. #endif //__cplusplus
  101. } R128;
  102. // Type conversion
  103. extern void r128FromInt(R128 *dst, R128_S64 v);
  104. extern void r128FromFloat(R128 *dst, double v);
  105. extern R128_S64 r128ToInt(const R128 *v);
  106. extern double r128ToFloat(const R128 *v);
  107. // Copy
  108. extern void r128Copy(R128 *dst, const R128 *src);
  109. // Sign manipulation
  110. extern void r128Neg(R128 *dst, const R128 *v); // -v
  111. extern void r128Abs(R128* dst, const R128* v); // abs(v)
  112. extern void r128Nabs(R128* dst, const R128* v); // -abs(v)
  113. // Bitwise operations
  114. extern void r128Not(R128 *dst, const R128 *src); // ~a
  115. extern void r128Or(R128 *dst, const R128 *a, const R128 *b); // a | b
  116. extern void r128And(R128 *dst, const R128 *a, const R128 *b); // a & b
  117. extern void r128Xor(R128 *dst, const R128 *a, const R128 *b); // a ^ b
  118. extern void r128Shl(R128 *dst, const R128 *src, int amount); // shift left by amount mod 128
  119. extern void r128Shr(R128 *dst, const R128 *src, int amount); // shift right logical by amount mod 128
  120. extern void r128Sar(R128 *dst, const R128 *src, int amount); // shift right arithmetic by amount mod 128
  121. // Arithmetic
  122. extern void r128Add(R128 *dst, const R128 *a, const R128 *b); // a + b
  123. extern void r128Sub(R128 *dst, const R128 *a, const R128 *b); // a - b
  124. extern void r128Mul(R128 *dst, const R128 *a, const R128 *b); // a * b
  125. extern void r128Div(R128 *dst, const R128 *a, const R128 *b); // a / b
  126. extern void r128Mod(R128 *dst, const R128 *a, const R128 *b); // a - toInt(a / b) * b
  127. extern void r128Sqrt(R128 *dst, const R128 *v); // sqrt(v)
  128. extern void r128Rsqrt(R128 *dst, const R128 *v); // 1 / sqrt(v)
  129. // Comparison
  130. extern int r128Cmp(const R128 *a, const R128 *b); // sign of a-b
  131. extern void r128Min(R128 *dst, const R128 *a, const R128 *b);
  132. extern void r128Max(R128 *dst, const R128 *a, const R128 *b);
  133. extern void r128Floor(R128 *dst, const R128 *v);
  134. extern void r128Ceil(R128 *dst, const R128 *v);
  135. extern void r128Round(R128 *dst, const R128 *v); // round to nearest, rounding halfway values away from zero
  136. extern int r128IsNeg(const R128 *v); // quick check for < 0
  137. // String conversion
  138. //
  139. typedef enum R128ToStringSign {
  140. R128ToStringSign_Default, // no sign character for positive values
  141. R128ToStringSign_Space, // leading space for positive values
  142. R128ToStringSign_Plus, // leading '+' for positive values
  143. } R128ToStringSign;
  144. // Formatting options for use with r128ToStringOpt. The "defaults" correspond
  145. // to a format string of "%f".
  146. //
  147. typedef struct R128ToStringFormat {
  148. // sign character for positive values. Default is R128ToStringSign_Default.
  149. R128ToStringSign sign;
  150. // minimum number of characters to write. Default is 0.
  151. int width;
  152. // place to the right of the decimal at which rounding is performed. If negative,
  153. // a maximum of 20 decimal places will be written, with no trailing zeroes.
  154. // (20 places is sufficient to ensure that r128FromString will convert back to the
  155. // original value.) Default is -1. NOTE: This is not the same default that the C
  156. // standard library uses for %f.
  157. int precision;
  158. // If non-zero, pads the output string with leading zeroes if the final result is
  159. // fewer than width characters. Otherwise, leading spaces are used. Default is 0.
  160. int zeroPad;
  161. // Always print a decimal point, even if the value is an integer. Default is 0.
  162. int decimal;
  163. // Left-align output if width specifier requires padding.
  164. // Default is 0 (right align).
  165. int leftAlign;
  166. } R128ToStringFormat;
  167. // r128ToStringOpt: convert R128 to a decimal string, with formatting.
  168. //
  169. // dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written
  170. // (including null terminator). No additional rounding is performed if dstSize is not large
  171. // enough to hold the entire string.
  172. //
  173. // opt: an R128ToStringFormat struct (q.v.) with formatting options.
  174. //
  175. // Uses the R128_decimal global as the decimal point character.
  176. // Always writes a null terminator, even if the destination buffer is not large enough.
  177. //
  178. // Number of bytes that will be written (i.e. how big does dst need to be?):
  179. // If width is specified: width + 1 bytes.
  180. // If precision is specified: at most precision + 22 bytes.
  181. // If neither is specified: at most 42 bytes.
  182. //
  183. // Returns the number of bytes that would have been written if dst was sufficiently large,
  184. // not including the final null terminator.
  185. //
  186. extern int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt);
  187. // r128ToStringf: convert R128 to a decimal string, with formatting.
  188. //
  189. // dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written
  190. // (including null terminator).
  191. //
  192. // format: a printf-style format specifier, as one would use with floating point types.
  193. // e.g. "%+5.2f". (The leading % and trailing f are optional.)
  194. // NOTE: This is NOT a full replacement for sprintf. Any characters in the format string
  195. // that do not correspond to a format placeholder are ignored.
  196. //
  197. // Uses the R128_decimal global as the decimal point character.
  198. // Always writes a null terminator, even if the destination buffer is not large enough.
  199. //
  200. // Number of bytes that will be written (i.e. how big does dst need to be?):
  201. // If the precision field is specified: at most max(width, precision + 21) + 1 bytes
  202. // Otherwise: at most max(width, 41) + 1 bytes.
  203. //
  204. // Returns the number of bytes that would have been written if dst was sufficiently large,
  205. // not including the final null terminator.
  206. //
  207. extern int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v);
  208. // r128ToString: convert R128 to a decimal string, with default formatting.
  209. // Equivalent to r128ToStringf(dst, dstSize, "%f", v).
  210. //
  211. // Uses the R128_decimal global as the decimal point character.
  212. // Always writes a null terminator, even if the destination buffer is not large enough.
  213. //
  214. // Will write at most 42 bytes (including NUL) to dst.
  215. //
  216. // Returns the number of bytes that would have been written if dst was sufficiently large,
  217. // not including the final null terminator.
  218. //
  219. extern int r128ToString(char *dst, size_t dstSize, const R128 *v);
  220. // r128FromString: Convert string to R128.
  221. //
  222. // The string can be formatted either as a decimal number with optional sign
  223. // or as hexadecimal with a prefix of 0x or 0X.
  224. //
  225. // endptr, if not NULL, is set to the character following the last character
  226. // used in the conversion.
  227. //
  228. extern void r128FromString(R128 *dst, const char *s, char **endptr);
  229. // Constants
  230. extern const R128 R128_min; // minimum (most negative) value
  231. extern const R128 R128_max; // maximum (most positive) value
  232. extern const R128 R128_smallest; // smallest positive value
  233. extern const R128 R128_zero; // zero
  234. extern const R128 R128_one; // 1.0
  235. extern char R128_decimal; // decimal point character used by r128From/ToString. defaults to '.'
  236. #ifdef __cplusplus
  237. }
  238. #include <limits>
  239. namespace std {
  240. template<>
  241. struct numeric_limits<R128>
  242. {
  243. static const bool is_specialized = true;
  244. static R128 min() throw() { return R128_min; }
  245. static R128 max() throw() { return R128_max; }
  246. static const int digits = 127;
  247. static const int digits10 = 38;
  248. static const bool is_signed = true;
  249. static const bool is_integer = false;
  250. static const bool is_exact = false;
  251. static const int radix = 2;
  252. static R128 epsilon() throw() { return R128_smallest; }
  253. static R128 round_error() throw() { return R128_one; }
  254. static const int min_exponent = 0;
  255. static const int min_exponent10 = 0;
  256. static const int max_exponent = 0;
  257. static const int max_exponent10 = 0;
  258. static const bool has_infinity = false;
  259. static const bool has_quiet_NaN = false;
  260. static const bool has_signaling_NaN = false;
  261. static const float_denorm_style has_denorm = denorm_absent;
  262. static const bool has_denorm_loss = false;
  263. static R128 infinity() throw() { return R128_zero; }
  264. static R128 quiet_NaN() throw() { return R128_zero; }
  265. static R128 signaling_NaN() throw() { return R128_zero; }
  266. static R128 denorm_min() throw() { return R128_zero; }
  267. static const bool is_iec559 = false;
  268. static const bool is_bounded = true;
  269. static const bool is_modulo = true;
  270. static const bool traps = numeric_limits<R128_U64>::traps;
  271. static const bool tinyness_before = false;
  272. static const float_round_style round_style = round_toward_zero;
  273. };
  274. } //namespace std
  275. inline R128::R128() {}
  276. inline R128::R128(double v)
  277. {
  278. r128FromFloat(this, v);
  279. }
  280. inline R128::R128(int v)
  281. {
  282. r128FromInt(this, v);
  283. }
  284. inline R128::R128(R128_S64 v)
  285. {
  286. r128FromInt(this, v);
  287. }
  288. inline R128::R128(R128_U64 low, R128_U64 high)
  289. {
  290. lo = low;
  291. hi = high;
  292. }
  293. inline R128::operator double() const
  294. {
  295. return r128ToFloat(this);
  296. }
  297. inline R128::operator R128_S64() const
  298. {
  299. return r128ToInt(this);
  300. }
  301. inline R128::operator int() const
  302. {
  303. return (int) r128ToInt(this);
  304. }
  305. inline R128::operator bool() const
  306. {
  307. return lo || hi;
  308. }
  309. inline bool R128::operator!() const
  310. {
  311. return !lo && !hi;
  312. }
  313. inline R128 R128::operator~() const
  314. {
  315. R128 r;
  316. r128Not(&r, this);
  317. return r;
  318. }
  319. inline R128 R128::operator-() const
  320. {
  321. R128 r;
  322. r128Neg(&r, this);
  323. return r;
  324. }
  325. inline R128 &R128::operator|=(const R128 &rhs)
  326. {
  327. r128Or(this, this, &rhs);
  328. return *this;
  329. }
  330. inline R128 &R128::operator&=(const R128 &rhs)
  331. {
  332. r128And(this, this, &rhs);
  333. return *this;
  334. }
  335. inline R128 &R128::operator^=(const R128 &rhs)
  336. {
  337. r128Xor(this, this, &rhs);
  338. return *this;
  339. }
  340. inline R128 &R128::operator+=(const R128 &rhs)
  341. {
  342. r128Add(this, this, &rhs);
  343. return *this;
  344. }
  345. inline R128 &R128::operator-=(const R128 &rhs)
  346. {
  347. r128Sub(this, this, &rhs);
  348. return *this;
  349. }
  350. inline R128 &R128::operator*=(const R128 &rhs)
  351. {
  352. r128Mul(this, this, &rhs);
  353. return *this;
  354. }
  355. inline R128 &R128::operator/=(const R128 &rhs)
  356. {
  357. r128Div(this, this, &rhs);
  358. return *this;
  359. }
  360. inline R128 &R128::operator%=(const R128 &rhs)
  361. {
  362. r128Mod(this, this, &rhs);
  363. return *this;
  364. }
  365. inline R128 &R128::operator<<=(int amount)
  366. {
  367. r128Shl(this, this, amount);
  368. return *this;
  369. }
  370. inline R128 &R128::operator>>=(int amount)
  371. {
  372. r128Sar(this, this, amount);
  373. return *this;
  374. }
  375. static inline R128 operator|(const R128 &lhs, const R128 &rhs)
  376. {
  377. R128 r(lhs);
  378. return r |= rhs;
  379. }
  380. static inline R128 operator&(const R128 &lhs, const R128 &rhs)
  381. {
  382. R128 r(lhs);
  383. return r &= rhs;
  384. }
  385. static inline R128 operator^(const R128 &lhs, const R128 &rhs)
  386. {
  387. R128 r(lhs);
  388. return r ^= rhs;
  389. }
  390. static inline R128 operator+(const R128 &lhs, const R128 &rhs)
  391. {
  392. R128 r(lhs);
  393. return r += rhs;
  394. }
  395. static inline R128 operator-(const R128 &lhs, const R128 &rhs)
  396. {
  397. R128 r(lhs);
  398. return r -= rhs;
  399. }
  400. static inline R128 operator*(const R128 &lhs, const R128 &rhs)
  401. {
  402. R128 r(lhs);
  403. return r *= rhs;
  404. }
  405. static inline R128 operator/(const R128 &lhs, const R128 &rhs)
  406. {
  407. R128 r(lhs);
  408. return r /= rhs;
  409. }
  410. static inline R128 operator%(const R128 &lhs, const R128 &rhs)
  411. {
  412. R128 r(lhs);
  413. return r %= rhs;
  414. }
  415. static inline R128 operator<<(const R128 &lhs, int amount)
  416. {
  417. R128 r(lhs);
  418. return r <<= amount;
  419. }
  420. static inline R128 operator>>(const R128 &lhs, int amount)
  421. {
  422. R128 r(lhs);
  423. return r >>= amount;
  424. }
  425. static inline bool operator<(const R128 &lhs, const R128 &rhs)
  426. {
  427. return r128Cmp(&lhs, &rhs) < 0;
  428. }
  429. static inline bool operator>(const R128 &lhs, const R128 &rhs)
  430. {
  431. return r128Cmp(&lhs, &rhs) > 0;
  432. }
  433. static inline bool operator<=(const R128 &lhs, const R128 &rhs)
  434. {
  435. return r128Cmp(&lhs, &rhs) <= 0;
  436. }
  437. static inline bool operator>=(const R128 &lhs, const R128 &rhs)
  438. {
  439. return r128Cmp(&lhs, &rhs) >= 0;
  440. }
  441. static inline bool operator==(const R128 &lhs, const R128 &rhs)
  442. {
  443. return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
  444. }
  445. static inline bool operator!=(const R128 &lhs, const R128 &rhs)
  446. {
  447. return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
  448. }
  449. #endif //__cplusplus
  450. #endif //H_R128_H
  451. #ifdef R128_IMPLEMENTATION
  452. #ifdef R128_DEBUG_VIS
  453. # define R128_DEBUG_SET(x) r128ToString(R128_last, sizeof(R128_last), x)
  454. #else
  455. # define R128_DEBUG_SET(x)
  456. #endif
  457. #define R128_SET2(x, l, h) do { (x)->lo = (R128_U64)(l); (x)->hi = (R128_U64)(h); } while(0)
  458. #define R128_R0(x) ((R128_U32)(x)->lo)
  459. #define R128_R2(x) ((R128_U32)(x)->hi)
  460. #if defined(_M_IX86)
  461. // workaround: MSVC x86's handling of 64-bit values is not great
  462. # define R128_SET4(x, r0, r1, r2, r3) do { \
  463. ((R128_U32*)&(x)->lo)[0] = (R128_U32)(r0); \
  464. ((R128_U32*)&(x)->lo)[1] = (R128_U32)(r1); \
  465. ((R128_U32*)&(x)->hi)[0] = (R128_U32)(r2); \
  466. ((R128_U32*)&(x)->hi)[1] = (R128_U32)(r3); \
  467. } while(0)
  468. # define R128_R1(x) (((R128_U32*)&(x)->lo)[1])
  469. # define R128_R3(x) (((R128_U32*)&(x)->hi)[1])
  470. #else
  471. # define R128_SET4(x, r0, r1, r2, r3) do { (x)->lo = (R128_U64)(r0) | ((R128_U64)(r1) << 32); \
  472. (x)->hi = (R128_U64)(r2) | ((R128_U64)(r3) << 32); } while(0)
  473. # define R128_R1(x) ((R128_U32)((x)->lo >> 32))
  474. # define R128_R3(x) ((R128_U32)((x)->hi >> 32))
  475. #endif
  476. #if defined(_M_X64)
  477. # define R128_INTEL 1
  478. # define R128_64BIT 1
  479. # ifndef R128_STDC_ONLY
  480. # include <intrin.h>
  481. # endif
  482. #elif defined(__x86_64__)
  483. # define R128_INTEL 1
  484. # define R128_64BIT 1
  485. # ifndef R128_STDC_ONLY
  486. # include <x86intrin.h>
  487. # endif
  488. #elif defined(_M_IX86)
  489. # define R128_INTEL 1
  490. # ifndef R128_STDC_ONLY
  491. # include <intrin.h>
  492. # endif
  493. #elif defined(__i386__)
  494. # define R128_INTEL 1
  495. # ifndef R128_STDC_ONLY
  496. # include <x86intrin.h>
  497. # endif
  498. #elif defined(_M_ARM)
  499. # ifndef R128_STDC_ONLY
  500. # include <intrin.h>
  501. # endif
  502. #elif defined(_M_ARM64)
  503. # define R128_64BIT 1
  504. # ifndef R128_STDC_ONLY
  505. # include <intrin.h>
  506. # endif
  507. #elif defined(__aarch64__)
  508. # define R128_64BIT 1
  509. #endif
  510. #ifndef R128_INTEL
  511. # define R128_INTEL 0
  512. #endif
  513. #ifndef R128_64BIT
  514. # define R128_64BIT 0
  515. #endif
  516. #ifndef R128_ASSERT
  517. # include <assert.h>
  518. # define R128_ASSERT(x) assert(x)
  519. #endif
  520. #include <stdlib.h> // for NULL
  521. static const R128ToStringFormat R128__defaultFormat = {
  522. R128ToStringSign_Default,
  523. 0,
  524. -1,
  525. 0,
  526. 0,
  527. 0
  528. };
  529. const R128 R128_min = { 0, R128_LIT_U64(0x8000000000000000) };
  530. const R128 R128_max = { R128_LIT_U64(0xffffffffffffffff), R128_LIT_U64(0x7fffffffffffffff) };
  531. const R128 R128_smallest = { 1, 0 };
  532. const R128 R128_zero = { 0, 0 };
  533. const R128 R128_one = { 0, 1 };
  534. char R128_decimal = '.';
  535. #ifdef R128_DEBUG_VIS
  536. char R128_last[42];
  537. #endif
  538. static int r128__clz64(R128_U64 x)
  539. {
  540. #if defined(R128_STDC_ONLY)
  541. R128_U64 n = 64, y;
  542. y = x >> 32; if (y) { n -= 32; x = y; }
  543. y = x >> 16; if (y) { n -= 16; x = y; }
  544. y = x >> 8; if (y) { n -= 8; x = y; }
  545. y = x >> 4; if (y) { n -= 4; x = y; }
  546. y = x >> 2; if (y) { n -= 2; x = y; }
  547. y = x >> 1; if (y) { n -= 1; x = y; }
  548. return (int)(n - x);
  549. #elif defined(_M_X64) || defined(_M_ARM64)
  550. unsigned long idx;
  551. if (_BitScanReverse64(&idx, x)) {
  552. return 63 - (int)idx;
  553. } else {
  554. return 64;
  555. }
  556. #elif defined(_MSC_VER)
  557. unsigned long idx;
  558. if (_BitScanReverse(&idx, (R128_U32)(x >> 32))) {
  559. return 31 - (int)idx;
  560. } else if (_BitScanReverse(&idx, (R128_U32)x)) {
  561. return 63 - (int)idx;
  562. } else {
  563. return 64;
  564. }
  565. #else
  566. return x ? __builtin_clzll(x) : 64;
  567. #endif
  568. }
  569. #if !R128_64BIT
  570. // 32*32->64
  571. static R128_U64 r128__umul64(R128_U32 a, R128_U32 b)
  572. {
  573. # if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  574. return __emulu(a, b);
  575. # elif defined(_M_ARM) && !defined(R128_STDC_ONLY)
  576. return _arm_umull(a, b);
  577. # else
  578. return a * (R128_U64)b;
  579. # endif
  580. }
  581. // 64/32->32
  582. static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem)
  583. {
  584. # if defined(_M_IX86) && (_MSC_VER >= 1920) && !defined(R128_STDC_ONLY)
  585. unsigned __int64 n = ((unsigned __int64)nhi << 32) | nlo;
  586. return _udiv64(n, d, rem);
  587. # elif defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  588. __asm {
  589. mov eax, nlo
  590. mov edx, nhi
  591. div d
  592. mov ecx, rem
  593. mov dword ptr [ecx], edx
  594. }
  595. # elif defined(__i386__) && !defined(R128_STDC_ONLY)
  596. R128_U32 q, r;
  597. __asm("divl %4"
  598. : "=a"(q), "=d"(r)
  599. : "a"(nlo), "d"(nhi), "X"(d));
  600. *rem = r;
  601. return q;
  602. # else
  603. R128_U64 n64 = ((R128_U64)nhi << 32) | nlo;
  604. *rem = (R128_U32)(n64 % d);
  605. return (R128_U32)(n64 / d);
  606. # endif
  607. }
  608. #elif defined(R128_STDC_ONLY) || !R128_INTEL
  609. #define r128__umul64(a, b) ((a) * (R128_U64)(b))
  610. static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem)
  611. {
  612. R128_U64 n64 = ((R128_U64)nhi << 32) | nlo;
  613. *rem = (R128_U32)(n64 % d);
  614. return (R128_U32)(n64 / d);
  615. }
  616. #endif //!R128_64BIT
  617. static void r128__neg(R128 *dst, const R128 *src)
  618. {
  619. R128_ASSERT(dst != NULL);
  620. R128_ASSERT(src != NULL);
  621. #if R128_INTEL && !defined(R128_STDC_ONLY)
  622. {
  623. unsigned char carry = 0;
  624. # if R128_64BIT
  625. carry = _addcarry_u64(carry, ~src->lo, 1, &dst->lo);
  626. carry = _addcarry_u64(carry, ~src->hi, 0, &dst->hi);
  627. # else
  628. R128_U32 r0, r1, r2, r3;
  629. carry = _addcarry_u32(carry, ~R128_R0(src), 1, &r0);
  630. carry = _addcarry_u32(carry, ~R128_R1(src), 0, &r1);
  631. carry = _addcarry_u32(carry, ~R128_R2(src), 0, &r2);
  632. carry = _addcarry_u32(carry, ~R128_R3(src), 0, &r3);
  633. R128_SET4(dst, r0, r1, r2, r3);
  634. # endif //R128_64BIT
  635. }
  636. #else
  637. if (src->lo) {
  638. dst->lo = ~src->lo + 1;
  639. dst->hi = ~src->hi;
  640. } else {
  641. dst->lo = 0;
  642. dst->hi = ~src->hi + 1;
  643. }
  644. #endif //R128_INTEL
  645. }
  646. // 64*64->128
  647. static void r128__umul128(R128 *dst, R128_U64 a, R128_U64 b)
  648. {
  649. #if defined(_M_X64) && !defined(R128_STDC_ONLY)
  650. dst->lo = _umul128(a, b, &dst->hi);
  651. #elif R128_64BIT && !defined(_MSC_VER) && !defined(R128_STDC_ONLY)
  652. unsigned __int128 p0 = a * (unsigned __int128)b;
  653. dst->hi = (R128_U64)(p0 >> 64);
  654. dst->lo = (R128_U64)p0;
  655. #else
  656. R128_U32 alo = (R128_U32)a;
  657. R128_U32 ahi = (R128_U32)(a >> 32);
  658. R128_U32 blo = (R128_U32)b;
  659. R128_U32 bhi = (R128_U32)(b >> 32);
  660. R128_U64 p0, p1, p2, p3;
  661. p0 = r128__umul64(alo, blo);
  662. p1 = r128__umul64(alo, bhi);
  663. p2 = r128__umul64(ahi, blo);
  664. p3 = r128__umul64(ahi, bhi);
  665. {
  666. #if R128_INTEL && !defined(R128_STDC_ONLY)
  667. R128_U32 r0, r1, r2, r3;
  668. unsigned char carry;
  669. r0 = (R128_U32)(p0);
  670. r1 = (R128_U32)(p0 >> 32);
  671. r2 = (R128_U32)(p1 >> 32);
  672. r3 = (R128_U32)(p3 >> 32);
  673. carry = _addcarry_u32(0, r1, (R128_U32)p1, &r1);
  674. carry = _addcarry_u32(carry, r2, (R128_U32)(p2 >> 32), &r2);
  675. _addcarry_u32(carry, r3, 0, &r3);
  676. carry = _addcarry_u32(0, r1, (R128_U32)p2, &r1);
  677. carry = _addcarry_u32(carry, r2, (R128_U32)p3, &r2);
  678. _addcarry_u32(carry, r3, 0, &r3);
  679. R128_SET4(dst, r0, r1, r2, r3);
  680. #else
  681. R128_U64 carry, lo, hi;
  682. carry = ((R128_U64)(R128_U32)p1 + (R128_U64)(R128_U32)p2 + (p0 >> 32)) >> 32;
  683. lo = p0 + ((p1 + p2) << 32);
  684. hi = p3 + ((R128_U32)(p1 >> 32) + (R128_U32)(p2 >> 32)) + carry;
  685. R128_SET2(dst, lo, hi);
  686. #endif
  687. }
  688. #endif
  689. }
  690. // 128/64->64
  691. #if defined(_M_X64) && (_MSC_VER < 1920) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  692. // MSVC x64 provides neither inline assembly nor (pre-2019) a div intrinsic, so we do fake
  693. // "inline assembly" to avoid long division or outline assembly.
  694. #pragma code_seg(".text")
  695. __declspec(allocate(".text") align(16)) static const unsigned char r128__udiv128Code[] = {
  696. 0x48, 0x8B, 0xC1, //mov rax, rcx
  697. 0x49, 0xF7, 0xF0, //div rax, r8
  698. 0x49, 0x89, 0x11, //mov qword ptr [r9], rdx
  699. 0xC3 //ret
  700. };
  701. typedef R128_U64 (*r128__udiv128Proc)(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem);
  702. static const r128__udiv128Proc r128__udiv128 = (r128__udiv128Proc)(void*)r128__udiv128Code;
  703. #else
  704. static R128_U64 r128__udiv128(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem)
  705. {
  706. #if defined(_M_X64) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  707. return _udiv128(nhi, nlo, d, rem);
  708. #elif defined(__x86_64__) && !defined(R128_STDC_ONLY)
  709. R128_U64 q, r;
  710. __asm("divq %4"
  711. : "=a"(q), "=d"(r)
  712. : "a"(nlo), "d"(nhi), "X"(d));
  713. *rem = r;
  714. return q;
  715. #else
  716. R128_U64 tmp;
  717. R128_U32 d0, d1;
  718. R128_U32 n3, n2, n1, n0;
  719. R128_U32 q0, q1;
  720. R128_U32 r;
  721. int shift;
  722. R128_ASSERT(d != 0); //division by zero
  723. R128_ASSERT(nhi < d); //overflow
  724. // normalize
  725. shift = r128__clz64(d);
  726. if (shift) {
  727. R128 tmp128;
  728. R128_SET2(&tmp128, nlo, nhi);
  729. r128Shl(&tmp128, &tmp128, shift);
  730. n3 = R128_R3(&tmp128);
  731. n2 = R128_R2(&tmp128);
  732. n1 = R128_R1(&tmp128);
  733. n0 = R128_R0(&tmp128);
  734. d <<= shift;
  735. } else {
  736. n3 = (R128_U32)(nhi >> 32);
  737. n2 = (R128_U32)nhi;
  738. n1 = (R128_U32)(nlo >> 32);
  739. n0 = (R128_U32)nlo;
  740. }
  741. d1 = (R128_U32)(d >> 32);
  742. d0 = (R128_U32)d;
  743. // first digit
  744. R128_ASSERT(n3 <= d1);
  745. if (n3 < d1) {
  746. q1 = r128__udiv64(n2, n3, d1, &r);
  747. } else {
  748. q1 = 0xffffffffu;
  749. r = n2 + d1;
  750. }
  751. refine1:
  752. if (r128__umul64(q1, d0) > ((R128_U64)r << 32) + n1) {
  753. --q1;
  754. if (r < ~d1 + 1) {
  755. r += d1;
  756. goto refine1;
  757. }
  758. }
  759. tmp = ((R128_U64)n2 << 32) + n1 - (r128__umul64(q1, d0) + (r128__umul64(q1, d1) << 32));
  760. n2 = (R128_U32)(tmp >> 32);
  761. n1 = (R128_U32)tmp;
  762. // second digit
  763. R128_ASSERT(n2 <= d1);
  764. if (n2 < d1) {
  765. q0 = r128__udiv64(n1, n2, d1, &r);
  766. } else {
  767. q0 = 0xffffffffu;
  768. r = n1 + d1;
  769. }
  770. refine0:
  771. if (r128__umul64(q0, d0) > ((R128_U64)r << 32) + n0) {
  772. --q0;
  773. if (r < ~d1 + 1) {
  774. r += d1;
  775. goto refine0;
  776. }
  777. }
  778. tmp = ((R128_U64)n1 << 32) + n0 - (r128__umul64(q0, d0) + (r128__umul64(q0, d1) << 32));
  779. n1 = (R128_U32)(tmp >> 32);
  780. n0 = (R128_U32)tmp;
  781. *rem = (((R128_U64)n1 << 32) + n0) >> shift;
  782. return ((R128_U64)q1 << 32) + q0;
  783. #endif
  784. }
  785. #endif
  786. static int r128__ucmp(const R128 *a, const R128 *b)
  787. {
  788. if (a->hi != b->hi) {
  789. if (a->hi > b->hi) {
  790. return 1;
  791. } else {
  792. return -1;
  793. }
  794. } else {
  795. if (a->lo == b->lo) {
  796. return 0;
  797. } else if (a->lo > b->lo) {
  798. return 1;
  799. } else {
  800. return -1;
  801. }
  802. }
  803. }
  804. static void r128__umul(R128 *dst, const R128 *a, const R128 *b)
  805. {
  806. #if defined(_M_X64) && !defined(R128_STDC_ONLY)
  807. R128_U64 t0, t1;
  808. R128_U64 lo, hi = 0;
  809. unsigned char carry;
  810. t0 = _umul128(a->lo, b->lo, &t1);
  811. carry = _addcarry_u64(0, t1, t0 >> 63, &lo);
  812. _addcarry_u64(carry, hi, hi, &hi);
  813. t0 = _umul128(a->lo, b->hi, &t1);
  814. carry = _addcarry_u64(0, lo, t0, &lo);
  815. _addcarry_u64(carry, hi, t1, &hi);
  816. t0 = _umul128(a->hi, b->lo, &t1);
  817. carry = _addcarry_u64(0, lo, t0, &lo);
  818. _addcarry_u64(carry, hi, t1, &hi);
  819. t0 = _umul128(a->hi, b->hi, &t1);
  820. hi += t0;
  821. R128_SET2(dst, lo, hi);
  822. #elif defined(__x86_64__) && !defined(R128_STDC_ONLY)
  823. unsigned __int128 p0, p1, p2, p3;
  824. p0 = a->lo * (unsigned __int128)b->lo;
  825. p1 = a->lo * (unsigned __int128)b->hi;
  826. p2 = a->hi * (unsigned __int128)b->lo;
  827. p3 = a->hi * (unsigned __int128)b->hi;
  828. p0 = (p3 << 64) + p2 + p1 + (p0 >> 64) + ((R128_U64)p0 >> 63);
  829. dst->lo = (R128_U64)p0;
  830. dst->hi = (R128_U64)(p0 >> 64);
  831. #else
  832. R128 p0, p1, p2, p3, round;
  833. r128__umul128(&p0, a->lo, b->lo);
  834. round.hi = 0; round.lo = p0.lo >> 63;
  835. p0.lo = p0.hi; p0.hi = 0; //r128Shr(&p0, &p0, 64);
  836. r128Add(&p0, &p0, &round);
  837. r128__umul128(&p1, a->hi, b->lo);
  838. r128Add(&p0, &p0, &p1);
  839. r128__umul128(&p2, a->lo, b->hi);
  840. r128Add(&p0, &p0, &p2);
  841. r128__umul128(&p3, a->hi, b->hi);
  842. p3.hi = p3.lo; p3.lo = 0; //r128Shl(&p3, &p3, 64);
  843. r128Add(&p0, &p0, &p3);
  844. R128_SET2(dst, p0.lo, p0.hi);
  845. #endif
  846. }
  847. // Shift d left until the high bit is set, and shift n left by the same amount.
  848. // returns non-zero on overflow.
  849. static int r128__norm(R128 *n, R128 *d, R128_U64 *n2)
  850. {
  851. R128_U64 d0, d1;
  852. R128_U64 n0, n1;
  853. int shift;
  854. d1 = d->hi;
  855. d0 = d->lo;
  856. n1 = n->hi;
  857. n0 = n->lo;
  858. if (d1) {
  859. shift = r128__clz64(d1);
  860. if (shift) {
  861. d1 = (d1 << shift) | (d0 >> (64 - shift));
  862. d0 = d0 << shift;
  863. *n2 = n1 >> (64 - shift);
  864. n1 = (n1 << shift) | (n0 >> (64 - shift));
  865. n0 = n0 << shift;
  866. } else {
  867. *n2 = 0;
  868. }
  869. } else {
  870. shift = r128__clz64(d0);
  871. if (r128__clz64(n1) <= shift) {
  872. return 1; // overflow
  873. }
  874. if (shift) {
  875. d1 = d0 << shift;
  876. d0 = 0;
  877. *n2 = (n1 << shift) | (n0 >> (64 - shift));
  878. n1 = n0 << shift;
  879. n0 = 0;
  880. } else {
  881. d1 = d0;
  882. d0 = 0;
  883. *n2 = n1;
  884. n1 = n0;
  885. n0 = 0;
  886. }
  887. }
  888. R128_SET2(n, n0, n1);
  889. R128_SET2(d, d0, d1);
  890. return 0;
  891. }
  892. static void r128__udiv(R128 *quotient, const R128 *dividend, const R128 *divisor)
  893. {
  894. R128 tmp;
  895. R128_U64 d0, d1;
  896. R128_U64 n1, n2, n3;
  897. R128 q;
  898. R128_ASSERT(dividend != NULL);
  899. R128_ASSERT(divisor != NULL);
  900. R128_ASSERT(quotient != NULL);
  901. R128_ASSERT(divisor->hi != 0 || divisor->lo != 0); // divide by zero
  902. // scale dividend and normalize
  903. {
  904. R128 n, d;
  905. R128_SET2(&n, dividend->lo, dividend->hi);
  906. R128_SET2(&d, divisor->lo, divisor->hi);
  907. if (r128__norm(&n, &d, &n3)) {
  908. R128_SET2(quotient, R128_max.lo, R128_max.hi);
  909. return;
  910. }
  911. d1 = d.hi;
  912. d0 = d.lo;
  913. n2 = n.hi;
  914. n1 = n.lo;
  915. }
  916. // first digit
  917. R128_ASSERT(n3 <= d1);
  918. {
  919. R128 t0, t1;
  920. t0.lo = n1;
  921. if (n3 < d1) {
  922. q.hi = r128__udiv128(n2, n3, d1, &t0.hi);
  923. } else {
  924. q.hi = R128_LIT_U64(0xffffffffffffffff);
  925. t0.hi = n2 + d1;
  926. }
  927. refine1:
  928. r128__umul128(&t1, q.hi, d0);
  929. if (r128__ucmp(&t1, &t0) > 0) {
  930. --q.hi;
  931. if (t0.hi < ~d1 + 1) {
  932. t0.hi += d1;
  933. goto refine1;
  934. }
  935. }
  936. }
  937. {
  938. R128 t0, t1, t2;
  939. t0.hi = n2;
  940. t0.lo = n1;
  941. r128__umul128(&t1, q.hi, d0);
  942. r128__umul128(&t2, q.hi, d1);
  943. t2.hi = t2.lo; t2.lo = 0; //r128Shl(&t2, &t2, 64);
  944. r128Add(&tmp, &t1, &t2);
  945. r128Sub(&tmp, &t0, &tmp);
  946. }
  947. n2 = tmp.hi;
  948. n1 = tmp.lo;
  949. // second digit
  950. R128_ASSERT(n2 <= d1);
  951. {
  952. R128 t0, t1;
  953. t0.lo = 0;
  954. if (n2 < d1) {
  955. q.lo = r128__udiv128(n1, n2, d1, &t0.hi);
  956. } else {
  957. q.lo = R128_LIT_U64(0xffffffffffffffff);
  958. t0.hi = n1 + d1;
  959. }
  960. refine0:
  961. r128__umul128(&t1, q.lo, d0);
  962. if (r128__ucmp(&t1, &t0) > 0) {
  963. --q.lo;
  964. if (t0.hi < ~d1 + 1) {
  965. t0.hi += d1;
  966. goto refine0;
  967. }
  968. }
  969. }
  970. R128_SET2(quotient, q.lo, q.hi);
  971. }
  972. static R128_U64 r128__umod(R128 *n, R128 *d)
  973. {
  974. R128_U64 d0, d1;
  975. R128_U64 n3, n2, n1;
  976. R128_U64 q;
  977. R128_ASSERT(d != NULL);
  978. R128_ASSERT(n != NULL);
  979. R128_ASSERT(d->hi != 0 || d->lo != 0); // divide by zero
  980. if (r128__norm(n, d, &n3)) {
  981. return R128_LIT_U64(0xffffffffffffffff);
  982. }
  983. d1 = d->hi;
  984. d0 = d->lo;
  985. n2 = n->hi;
  986. n1 = n->lo;
  987. R128_ASSERT(n3 < d1);
  988. {
  989. R128 t0, t1;
  990. t0.lo = n1;
  991. q = r128__udiv128(n2, n3, d1, &t0.hi);
  992. refine1:
  993. r128__umul128(&t1, q, d0);
  994. if (r128__ucmp(&t1, &t0) > 0) {
  995. --q;
  996. if (t0.hi < ~d1 + 1) {
  997. t0.hi += d1;
  998. goto refine1;
  999. }
  1000. }
  1001. }
  1002. return q;
  1003. }
  1004. static int r128__format(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *format)
  1005. {
  1006. char buf[128];
  1007. R128 tmp;
  1008. R128_U64 whole;
  1009. char *cursor, *decimal, *dstp = dst;
  1010. int sign = 0;
  1011. int fullPrecision = 1;
  1012. int width, precision;
  1013. int padCnt, trail = 0;
  1014. R128_ASSERT(dst != NULL && dstSize > 0);
  1015. R128_ASSERT(v != NULL);
  1016. R128_ASSERT(format != NULL);
  1017. --dstSize;
  1018. R128_SET2(&tmp, v->lo, v->hi);
  1019. if (r128IsNeg(&tmp)) {
  1020. r128__neg(&tmp, &tmp);
  1021. sign = 1;
  1022. }
  1023. width = format->width;
  1024. if (width < 0) {
  1025. width = 0;
  1026. }
  1027. precision = format->precision;
  1028. if (precision < 0) {
  1029. // print a maximum of 20 digits
  1030. fullPrecision = 0;
  1031. precision = 20;
  1032. } else if (precision > sizeof(buf) - 21) {
  1033. trail = precision - (sizeof(buf) - 21);
  1034. precision -= trail;
  1035. }
  1036. whole = tmp.hi;
  1037. decimal = cursor = buf;
  1038. // fractional part first in case a carry into the whole part is required
  1039. if (tmp.lo || format->decimal) {
  1040. while (tmp.lo || (fullPrecision && precision)) {
  1041. if ((int)(cursor - buf) == precision) {
  1042. if ((R128_S64)tmp.lo < 0) {
  1043. // round up, propagate carry backwards
  1044. char *c;
  1045. for (c = cursor - 1; c >= buf; --c) {
  1046. char d = ++*c;
  1047. if (d <= '9') {
  1048. goto endfrac;
  1049. } else {
  1050. *c = '0';
  1051. }
  1052. }
  1053. // carry out into the whole part
  1054. whole++;
  1055. }
  1056. break;
  1057. }
  1058. r128__umul128(&tmp, tmp.lo, 10);
  1059. *cursor++ = (char)tmp.hi + '0';
  1060. }
  1061. endfrac:
  1062. if (format->decimal || precision) {
  1063. decimal = cursor;
  1064. *cursor++ = R128_decimal;
  1065. }
  1066. }
  1067. // whole part
  1068. do {
  1069. char digit = (char)(whole % 10);
  1070. whole /= 10;
  1071. *cursor++ = digit + '0';
  1072. } while (whole);
  1073. #define R128__WRITE(c) do { if (dstp < dst + dstSize) *dstp = c; ++dstp; } while(0)
  1074. padCnt = width - (int)(cursor - buf) - 1;
  1075. // left padding
  1076. if (!format->leftAlign) {
  1077. char padChar = format->zeroPad ? '0' : ' ';
  1078. if (format->zeroPad) {
  1079. if (sign) {
  1080. R128__WRITE('-');
  1081. } else if (format->sign == R128ToStringSign_Plus) {
  1082. R128__WRITE('+');
  1083. } else if (format->sign == R128ToStringSign_Space) {
  1084. R128__WRITE(' ');
  1085. } else {
  1086. ++padCnt;
  1087. }
  1088. }
  1089. for (; padCnt > 0; --padCnt) {
  1090. R128__WRITE(padChar);
  1091. }
  1092. }
  1093. if (format->leftAlign || !format->zeroPad) {
  1094. if (sign) {
  1095. R128__WRITE('-');
  1096. } else if (format->sign == R128ToStringSign_Plus) {
  1097. R128__WRITE('+');
  1098. } else if (format->sign == R128ToStringSign_Space) {
  1099. R128__WRITE(' ');
  1100. } else {
  1101. ++padCnt;
  1102. }
  1103. }
  1104. {
  1105. char *i;
  1106. // reverse the whole part
  1107. for (i = cursor - 1; i >= decimal; --i) {
  1108. R128__WRITE(*i);
  1109. }
  1110. // copy the fractional part
  1111. for (i = buf; i < decimal; ++i) {
  1112. R128__WRITE(*i);
  1113. }
  1114. }
  1115. // right padding
  1116. if (format->leftAlign) {
  1117. char padChar = format->zeroPad ? '0' : ' ';
  1118. for (; padCnt > 0; --padCnt) {
  1119. R128__WRITE(padChar);
  1120. }
  1121. }
  1122. // trailing zeroes for very large precision
  1123. while (trail--) {
  1124. R128__WRITE('0');
  1125. }
  1126. #undef R128__WRITE
  1127. if (dstp <= dst + dstSize) {
  1128. *dstp = '\0';
  1129. } else {
  1130. dst[dstSize] = '\0';
  1131. }
  1132. return (int)(dstp - dst);
  1133. }
  1134. void r128FromInt(R128 *dst, R128_S64 v)
  1135. {
  1136. R128_ASSERT(dst != NULL);
  1137. dst->lo = 0;
  1138. dst->hi = (R128_U64)v;
  1139. R128_DEBUG_SET(dst);
  1140. }
  1141. void r128FromFloat(R128 *dst, double v)
  1142. {
  1143. R128_ASSERT(dst != NULL);
  1144. if (v < -9223372036854775808.0) {
  1145. r128Copy(dst, &R128_min);
  1146. } else if (v >= 9223372036854775808.0) {
  1147. r128Copy(dst, &R128_max);
  1148. } else {
  1149. R128 r;
  1150. int sign = 0;
  1151. if (v < 0) {
  1152. v = -v;
  1153. sign = 1;
  1154. }
  1155. r.hi = (R128_U64)(R128_S64)v;
  1156. v -= (R128_S64)v;
  1157. r.lo = (R128_U64)(v * 18446744073709551616.0);
  1158. if (sign) {
  1159. r128__neg(&r, &r);
  1160. }
  1161. r128Copy(dst, &r);
  1162. }
  1163. }
  1164. void r128FromString(R128 *dst, const char *s, char **endptr)
  1165. {
  1166. R128_U64 lo = 0, hi = 0;
  1167. R128_U64 base = 10;
  1168. int sign = 0;
  1169. R128_ASSERT(dst != NULL);
  1170. R128_ASSERT(s != NULL);
  1171. R128_SET2(dst, 0, 0);
  1172. // consume whitespace
  1173. for (;;) {
  1174. if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' || *s == '\v') {
  1175. ++s;
  1176. } else {
  1177. break;
  1178. }
  1179. }
  1180. // sign
  1181. if (*s == '-') {
  1182. sign = 1;
  1183. ++s;
  1184. } else if (*s == '+') {
  1185. ++s;
  1186. }
  1187. // parse base prefix
  1188. if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
  1189. base = 16;
  1190. s += 2;
  1191. }
  1192. // whole part
  1193. for (;; ++s) {
  1194. R128_U64 digit;
  1195. if ('0' <= *s && *s <= '9') {
  1196. digit = *s - '0';
  1197. } else if (base == 16 && 'a' <= *s && *s <= 'f') {
  1198. digit = *s - 'a' + 10;
  1199. } else if (base == 16 && 'A' <= *s && *s <= 'F') {
  1200. digit = *s - 'A' + 10;
  1201. } else {
  1202. break;
  1203. }
  1204. hi = hi * base + digit;
  1205. }
  1206. // fractional part
  1207. if (*s == R128_decimal) {
  1208. const char *exp = ++s;
  1209. // find the last digit and work backwards
  1210. for (;; ++s) {
  1211. if ('0' <= *s && *s <= '9') {
  1212. } else if (base == 16 && ('a' <= *s && *s <= 'f')) {
  1213. } else if (base == 16 && ('A' <= *s && *s <= 'F')) {
  1214. } else {
  1215. break;
  1216. }
  1217. }
  1218. for (const char *c = s - 1; c >= exp; --c) {
  1219. R128_U64 digit, unused;
  1220. if ('0' <= *c && *c <= '9') {
  1221. digit = *c - '0';
  1222. } else if ('a' <= *c && *c <= 'f') {
  1223. digit = *c - 'a' + 10;
  1224. } else {
  1225. digit = *c - 'A' + 10;
  1226. }
  1227. lo = r128__udiv128(lo, digit, base, &unused);
  1228. }
  1229. }
  1230. R128_SET2(dst, lo, hi);
  1231. if (sign) {
  1232. r128__neg(dst, dst);
  1233. }
  1234. if (endptr) {
  1235. *endptr = (char *) s;
  1236. }
  1237. }
  1238. R128_S64 r128ToInt(const R128 *v)
  1239. {
  1240. R128_ASSERT(v != NULL);
  1241. if ((R128_S64)v->hi < 0) {
  1242. return (R128_S64)v->hi + (v->lo != 0);
  1243. } else {
  1244. return (R128_S64)v->hi;
  1245. }
  1246. }
  1247. double r128ToFloat(const R128 *v)
  1248. {
  1249. R128 tmp;
  1250. int sign = 0;
  1251. double d;
  1252. R128_ASSERT(v != NULL);
  1253. R128_SET2(&tmp, v->lo, v->hi);
  1254. if (r128IsNeg(&tmp)) {
  1255. r128__neg(&tmp, &tmp);
  1256. sign = 1;
  1257. }
  1258. d = tmp.hi + tmp.lo * (1 / 18446744073709551616.0);
  1259. if (sign) {
  1260. d = -d;
  1261. }
  1262. return d;
  1263. }
  1264. int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt)
  1265. {
  1266. return r128__format(dst, dstSize, v, opt);
  1267. }
  1268. int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v)
  1269. {
  1270. R128ToStringFormat opts;
  1271. R128_ASSERT(dst != NULL && dstSize);
  1272. R128_ASSERT(format != NULL);
  1273. R128_ASSERT(v != NULL);
  1274. opts.sign = R128__defaultFormat.sign;
  1275. opts.precision = R128__defaultFormat.precision;
  1276. opts.zeroPad = R128__defaultFormat.zeroPad;
  1277. opts.decimal = R128__defaultFormat.decimal;
  1278. opts.leftAlign = R128__defaultFormat.leftAlign;
  1279. if (*format == '%') {
  1280. ++format;
  1281. }
  1282. // flags field
  1283. for (;; ++format) {
  1284. if (*format == ' ' && opts.sign != R128ToStringSign_Plus) {
  1285. opts.sign = R128ToStringSign_Space;
  1286. } else if (*format == '+') {
  1287. opts.sign = R128ToStringSign_Plus;
  1288. } else if (*format == '0') {
  1289. opts.zeroPad = 1;
  1290. } else if (*format == '-') {
  1291. opts.leftAlign = 1;
  1292. } else if (*format == '#') {
  1293. opts.decimal = 1;
  1294. } else {
  1295. break;
  1296. }
  1297. }
  1298. // width field
  1299. opts.width = 0;
  1300. for (;;) {
  1301. if ('0' <= *format && *format <= '9') {
  1302. opts.width = opts.width * 10 + *format++ - '0';
  1303. } else {
  1304. break;
  1305. }
  1306. }
  1307. // precision field
  1308. if (*format == '.') {
  1309. opts.precision = 0;
  1310. ++format;
  1311. for (;;) {
  1312. if ('0' <= *format && *format <= '9') {
  1313. opts.precision = opts.precision * 10 + *format++ - '0';
  1314. } else {
  1315. break;
  1316. }
  1317. }
  1318. }
  1319. return r128__format(dst, dstSize, v, &opts);
  1320. }
  1321. int r128ToString(char *dst, size_t dstSize, const R128 *v)
  1322. {
  1323. return r128__format(dst, dstSize, v, &R128__defaultFormat);
  1324. }
  1325. void r128Copy(R128 *dst, const R128 *src)
  1326. {
  1327. R128_ASSERT(dst != NULL);
  1328. R128_ASSERT(src != NULL);
  1329. dst->lo = src->lo;
  1330. dst->hi = src->hi;
  1331. R128_DEBUG_SET(dst);
  1332. }
  1333. void r128Neg(R128 *dst, const R128 *v)
  1334. {
  1335. r128__neg(dst, v);
  1336. R128_DEBUG_SET(dst);
  1337. }
  1338. void r128Abs(R128* dst, const R128* v)
  1339. {
  1340. R128 sign, inv;
  1341. R128_ASSERT(dst != NULL);
  1342. R128_ASSERT(v != NULL);
  1343. sign.lo = sign.hi = (R128_U64)(((R128_S64)v->hi) >> 63);
  1344. inv.lo = v->lo ^ sign.lo;
  1345. inv.hi = v->hi ^ sign.hi;
  1346. r128Sub(dst, &inv, &sign);
  1347. }
  1348. void r128Nabs(R128* dst, const R128* v)
  1349. {
  1350. R128 sign, inv;
  1351. R128_ASSERT(dst != NULL);
  1352. R128_ASSERT(v != NULL);
  1353. sign.lo = sign.hi = (R128_U64)(((R128_S64)v->hi) >> 63);
  1354. inv.lo = v->lo ^ sign.lo;
  1355. inv.hi = v->hi ^ sign.hi;
  1356. r128Sub(dst, &sign, &inv);
  1357. }
  1358. void r128Not(R128 *dst, const R128 *src)
  1359. {
  1360. R128_ASSERT(dst != NULL);
  1361. R128_ASSERT(src != NULL);
  1362. dst->lo = ~src->lo;
  1363. dst->hi = ~src->hi;
  1364. R128_DEBUG_SET(dst);
  1365. }
  1366. void r128Or(R128 *dst, const R128 *a, const R128 *b)
  1367. {
  1368. R128_ASSERT(dst != NULL);
  1369. R128_ASSERT(a != NULL);
  1370. R128_ASSERT(b != NULL);
  1371. dst->lo = a->lo | b->lo;
  1372. dst->hi = a->hi | b->hi;
  1373. R128_DEBUG_SET(dst);
  1374. }
  1375. void r128And(R128 *dst, const R128 *a, const R128 *b)
  1376. {
  1377. R128_ASSERT(dst != NULL);
  1378. R128_ASSERT(a != NULL);
  1379. R128_ASSERT(b != NULL);
  1380. dst->lo = a->lo & b->lo;
  1381. dst->hi = a->hi & b->hi;
  1382. R128_DEBUG_SET(dst);
  1383. }
  1384. void r128Xor(R128 *dst, const R128 *a, const R128 *b)
  1385. {
  1386. R128_ASSERT(dst != NULL);
  1387. R128_ASSERT(a != NULL);
  1388. R128_ASSERT(b != NULL);
  1389. dst->lo = a->lo ^ b->lo;
  1390. dst->hi = a->hi ^ b->hi;
  1391. R128_DEBUG_SET(dst);
  1392. }
  1393. void r128Shl(R128 *dst, const R128 *src, int amount)
  1394. {
  1395. R128_U64 r[4];
  1396. R128_ASSERT(dst != NULL);
  1397. R128_ASSERT(src != NULL);
  1398. #if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  1399. __asm {
  1400. // load src
  1401. mov edx, dword ptr[src]
  1402. mov ecx, amount
  1403. mov edi, dword ptr[edx]
  1404. mov esi, dword ptr[edx + 4]
  1405. mov ebx, dword ptr[edx + 8]
  1406. mov eax, dword ptr[edx + 12]
  1407. // shift mod 32
  1408. shld eax, ebx, cl
  1409. shld ebx, esi, cl
  1410. shld esi, edi, cl
  1411. shl edi, cl
  1412. // clear out low 12 bytes of stack
  1413. xor edx, edx
  1414. mov dword ptr[r], edx
  1415. mov dword ptr[r + 4], edx
  1416. mov dword ptr[r + 8], edx
  1417. // store shifted amount offset by count/32 bits
  1418. shr ecx, 5
  1419. and ecx, 3
  1420. mov dword ptr[r + ecx * 4 + 0], edi
  1421. mov dword ptr[r + ecx * 4 + 4], esi
  1422. mov dword ptr[r + ecx * 4 + 8], ebx
  1423. mov dword ptr[r + ecx * 4 + 12], eax
  1424. }
  1425. #else
  1426. r[0] = src->lo;
  1427. r[1] = src->hi;
  1428. amount &= 127;
  1429. if (amount >= 64) {
  1430. r[1] = r[0] << (amount - 64);
  1431. r[0] = 0;
  1432. } else if (amount) {
  1433. # if defined(_M_X64) && !defined(R128_STDC_ONLY)
  1434. r[1] = __shiftleft128(r[0], r[1], (char) amount);
  1435. # else
  1436. r[1] = (r[1] << amount) | (r[0] >> (64 - amount));
  1437. # endif
  1438. r[0] = r[0] << amount;
  1439. }
  1440. #endif //_M_IX86
  1441. dst->lo = r[0];
  1442. dst->hi = r[1];
  1443. R128_DEBUG_SET(dst);
  1444. }
  1445. void r128Shr(R128 *dst, const R128 *src, int amount)
  1446. {
  1447. R128_U64 r[4];
  1448. R128_ASSERT(dst != NULL);
  1449. R128_ASSERT(src != NULL);
  1450. #if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  1451. __asm {
  1452. // load src
  1453. mov edx, dword ptr[src]
  1454. mov ecx, amount
  1455. mov edi, dword ptr[edx]
  1456. mov esi, dword ptr[edx + 4]
  1457. mov ebx, dword ptr[edx + 8]
  1458. mov eax, dword ptr[edx + 12]
  1459. // shift mod 32
  1460. shrd edi, esi, cl
  1461. shrd esi, ebx, cl
  1462. shrd ebx, eax, cl
  1463. shr eax, cl
  1464. // clear out high 12 bytes of stack
  1465. xor edx, edx
  1466. mov dword ptr[r + 20], edx
  1467. mov dword ptr[r + 24], edx
  1468. mov dword ptr[r + 28], edx
  1469. // store shifted amount offset by -count/32 bits
  1470. shr ecx, 5
  1471. and ecx, 3
  1472. neg ecx
  1473. mov dword ptr[r + ecx * 4 + 16], edi
  1474. mov dword ptr[r + ecx * 4 + 20], esi
  1475. mov dword ptr[r + ecx * 4 + 24], ebx
  1476. mov dword ptr[r + ecx * 4 + 28], eax
  1477. }
  1478. #else
  1479. r[2] = src->lo;
  1480. r[3] = src->hi;
  1481. amount &= 127;
  1482. if (amount >= 64) {
  1483. r[2] = r[3] >> (amount - 64);
  1484. r[3] = 0;
  1485. } else if (amount) {
  1486. #if defined(_M_X64) && !defined(R128_STDC_ONLY)
  1487. r[2] = __shiftright128(r[2], r[3], (char) amount);
  1488. #else
  1489. r[2] = (r[2] >> amount) | (r[3] << (64 - amount));
  1490. #endif
  1491. r[3] = r[3] >> amount;
  1492. }
  1493. #endif
  1494. dst->lo = r[2];
  1495. dst->hi = r[3];
  1496. R128_DEBUG_SET(dst);
  1497. }
  1498. void r128Sar(R128 *dst, const R128 *src, int amount)
  1499. {
  1500. R128_U64 r[4];
  1501. R128_ASSERT(dst != NULL);
  1502. R128_ASSERT(src != NULL);
  1503. #if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
  1504. __asm {
  1505. // load src
  1506. mov edx, dword ptr[src]
  1507. mov ecx, amount
  1508. mov edi, dword ptr[edx]
  1509. mov esi, dword ptr[edx + 4]
  1510. mov ebx, dword ptr[edx + 8]
  1511. mov eax, dword ptr[edx + 12]
  1512. // shift mod 32
  1513. shrd edi, esi, cl
  1514. shrd esi, ebx, cl
  1515. shrd ebx, eax, cl
  1516. sar eax, cl
  1517. // copy sign to high 12 bytes of stack
  1518. cdq
  1519. mov dword ptr[r + 20], edx
  1520. mov dword ptr[r + 24], edx
  1521. mov dword ptr[r + 28], edx
  1522. // store shifted amount offset by -count/32 bits
  1523. shr ecx, 5
  1524. and ecx, 3
  1525. neg ecx
  1526. mov dword ptr[r + ecx * 4 + 16], edi
  1527. mov dword ptr[r + ecx * 4 + 20], esi
  1528. mov dword ptr[r + ecx * 4 + 24], ebx
  1529. mov dword ptr[r + ecx * 4 + 28], eax
  1530. }
  1531. #else
  1532. r[2] = src->lo;
  1533. r[3] = src->hi;
  1534. amount &= 127;
  1535. if (amount >= 64) {
  1536. r[2] = (R128_U64)((R128_S64)r[3] >> (amount - 64));
  1537. r[3] = (R128_U64)((R128_S64)r[3] >> 63);
  1538. } else if (amount) {
  1539. r[2] = (r[2] >> amount) | (R128_U64)((R128_S64)r[3] << (64 - amount));
  1540. r[3] = (R128_U64)((R128_S64)r[3] >> amount);
  1541. }
  1542. #endif
  1543. dst->lo = r[2];
  1544. dst->hi = r[3];
  1545. R128_DEBUG_SET(dst);
  1546. }
  1547. void r128Add(R128 *dst, const R128 *a, const R128 *b)
  1548. {
  1549. unsigned char carry = 0;
  1550. R128_ASSERT(dst != NULL);
  1551. R128_ASSERT(a != NULL);
  1552. R128_ASSERT(b != NULL);
  1553. #if R128_INTEL && !defined(R128_STDC_ONLY)
  1554. # if R128_64BIT
  1555. carry = _addcarry_u64(carry, a->lo, b->lo, &dst->lo);
  1556. carry = _addcarry_u64(carry, a->hi, b->hi, &dst->hi);
  1557. # else
  1558. R128_U32 r0, r1, r2, r3;
  1559. carry = _addcarry_u32(carry, R128_R0(a), R128_R0(b), &r0);
  1560. carry = _addcarry_u32(carry, R128_R1(a), R128_R1(b), &r1);
  1561. carry = _addcarry_u32(carry, R128_R2(a), R128_R2(b), &r2);
  1562. carry = _addcarry_u32(carry, R128_R3(a), R128_R3(b), &r3);
  1563. R128_SET4(dst, r0, r1, r2, r3);
  1564. # endif //R128_64BIT
  1565. #else
  1566. {
  1567. R128_U64 r = a->lo + b->lo;
  1568. carry = r < a->lo;
  1569. dst->lo = r;
  1570. dst->hi = a->hi + b->hi + carry;
  1571. }
  1572. #endif //R128_INTEL
  1573. R128_DEBUG_SET(dst);
  1574. }
  1575. void r128Sub(R128 *dst, const R128 *a, const R128 *b)
  1576. {
  1577. unsigned char borrow = 0;
  1578. R128_ASSERT(dst != NULL);
  1579. R128_ASSERT(a != NULL);
  1580. R128_ASSERT(b != NULL);
  1581. #if R128_INTEL && !defined(R128_STDC_ONLY)
  1582. # if R128_64BIT
  1583. borrow = _subborrow_u64(borrow, a->lo, b->lo, &dst->lo);
  1584. borrow = _subborrow_u64(borrow, a->hi, b->hi, &dst->hi);
  1585. # else
  1586. R128_U32 r0, r1, r2, r3;
  1587. borrow = _subborrow_u32(borrow, R128_R0(a), R128_R0(b), &r0);
  1588. borrow = _subborrow_u32(borrow, R128_R1(a), R128_R1(b), &r1);
  1589. borrow = _subborrow_u32(borrow, R128_R2(a), R128_R2(b), &r2);
  1590. borrow = _subborrow_u32(borrow, R128_R3(a), R128_R3(b), &r3);
  1591. R128_SET4(dst, r0, r1, r2, r3);
  1592. # endif //R128_64BIT
  1593. #else
  1594. {
  1595. R128_U64 r = a->lo - b->lo;
  1596. borrow = r > a->lo;
  1597. dst->lo = r;
  1598. dst->hi = a->hi - b->hi - borrow;
  1599. }
  1600. #endif //R128_INTEL
  1601. R128_DEBUG_SET(dst);
  1602. }
  1603. void r128Mul(R128 *dst, const R128 *a, const R128 *b)
  1604. {
  1605. int sign = 0;
  1606. R128 ta, tb, tc;
  1607. R128_ASSERT(dst != NULL);
  1608. R128_ASSERT(a != NULL);
  1609. R128_ASSERT(b != NULL);
  1610. R128_SET2(&ta, a->lo, a->hi);
  1611. R128_SET2(&tb, b->lo, b->hi);
  1612. if (r128IsNeg(&ta)) {
  1613. r128__neg(&ta, &ta);
  1614. sign = !sign;
  1615. }
  1616. if (r128IsNeg(&tb)) {
  1617. r128__neg(&tb, &tb);
  1618. sign = !sign;
  1619. }
  1620. r128__umul(&tc, &ta, &tb);
  1621. if (sign) {
  1622. r128__neg(&tc, &tc);
  1623. }
  1624. r128Copy(dst, &tc);
  1625. }
  1626. void r128Div(R128 *dst, const R128 *a, const R128 *b)
  1627. {
  1628. int sign = 0;
  1629. R128 tn, td, tq;
  1630. R128_ASSERT(dst != NULL);
  1631. R128_ASSERT(a != NULL);
  1632. R128_ASSERT(b != NULL);
  1633. R128_SET2(&tn, a->lo, a->hi);
  1634. R128_SET2(&td, b->lo, b->hi);
  1635. if (r128IsNeg(&tn)) {
  1636. r128__neg(&tn, &tn);
  1637. sign = !sign;
  1638. }
  1639. if (td.lo == 0 && td.hi == 0) {
  1640. // divide by zero
  1641. if (sign) {
  1642. r128Copy(dst, &R128_min);
  1643. } else {
  1644. r128Copy(dst, &R128_max);
  1645. }
  1646. return;
  1647. } else if (r128IsNeg(&td)) {
  1648. r128__neg(&td, &td);
  1649. sign = !sign;
  1650. }
  1651. r128__udiv(&tq, &tn, &td);
  1652. if (sign) {
  1653. r128__neg(&tq, &tq);
  1654. }
  1655. r128Copy(dst, &tq);
  1656. }
  1657. void r128Mod(R128 *dst, const R128 *a, const R128 *b)
  1658. {
  1659. int sign = 0;
  1660. R128 tn, td, tq;
  1661. R128_ASSERT(dst != NULL);
  1662. R128_ASSERT(a != NULL);
  1663. R128_ASSERT(b != NULL);
  1664. R128_SET2(&tn, a->lo, a->hi);
  1665. R128_SET2(&td, b->lo, b->hi);
  1666. if (r128IsNeg(&tn)) {
  1667. r128__neg(&tn, &tn);
  1668. sign = !sign;
  1669. }
  1670. if (td.lo == 0 && td.hi == 0) {
  1671. // divide by zero
  1672. if (sign) {
  1673. r128Copy(dst, &R128_min);
  1674. } else {
  1675. r128Copy(dst, &R128_max);
  1676. }
  1677. return;
  1678. } else if (r128IsNeg(&td)) {
  1679. r128__neg(&td, &td);
  1680. sign = !sign;
  1681. }
  1682. tq.hi = r128__umod(&tn, &td);
  1683. tq.lo = 0;
  1684. if (sign) {
  1685. tq.hi = ~tq.hi + 1;
  1686. }
  1687. r128Mul(&tq, &tq, b);
  1688. r128Sub(dst, a, &tq);
  1689. }
  1690. void r128Rsqrt(R128 *dst, const R128 *v)
  1691. {
  1692. static const R128 threeHalves = { R128_LIT_U64(0x8000000000000000), 1 };
  1693. R128 x, est;
  1694. int i;
  1695. if ((R128_S64)v->hi < 0) {
  1696. r128Copy(dst, &R128_min);
  1697. return;
  1698. }
  1699. R128_SET2(&x, v->lo, v->hi);
  1700. // get initial estimate
  1701. if (x.hi) {
  1702. int shift = (64 + r128__clz64(x.hi)) >> 1;
  1703. est.lo = R128_LIT_U64(1) << shift;
  1704. est.hi = 0;
  1705. } else if (x.lo) {
  1706. int shift = r128__clz64(x.lo) >> 1;
  1707. est.hi = R128_LIT_U64(1) << shift;
  1708. est.lo = 0;
  1709. } else {
  1710. R128_SET2(dst, 0, 0);
  1711. return;
  1712. }
  1713. // x /= 2
  1714. r128Shr(&x, &x, 1);
  1715. // Newton-Raphson iterate
  1716. for (i = 0; i < 7; ++i) {
  1717. R128 newEst;
  1718. // newEst = est * (threeHalves - (x / 2) * est * est);
  1719. r128__umul(&newEst, &est, &est);
  1720. r128__umul(&newEst, &newEst, &x);
  1721. r128Sub(&newEst, &threeHalves, &newEst);
  1722. r128__umul(&newEst, &est, &newEst);
  1723. if (newEst.lo == est.lo && newEst.hi == est.hi) {
  1724. break;
  1725. }
  1726. R128_SET2(&est, newEst.lo, newEst.hi);
  1727. }
  1728. r128Copy(dst, &est);
  1729. }
  1730. void r128Sqrt(R128 *dst, const R128 *v)
  1731. {
  1732. R128 x, est;
  1733. int i;
  1734. if ((R128_S64)v->hi < 0) {
  1735. r128Copy(dst, &R128_min);
  1736. return;
  1737. }
  1738. R128_SET2(&x, v->lo, v->hi);
  1739. // get initial estimate
  1740. if (x.hi) {
  1741. int shift = (63 - r128__clz64(x.hi)) >> 1;
  1742. r128Shr(&est, &x, shift);
  1743. } else if (x.lo) {
  1744. int shift = (1 + r128__clz64(x.lo)) >> 1;
  1745. r128Shl(&est, &x, shift);
  1746. } else {
  1747. R128_SET2(dst, 0, 0);
  1748. return;
  1749. }
  1750. // Newton-Raphson iterate
  1751. for (i = 0; i < 7; ++i) {
  1752. R128 newEst;
  1753. // newEst = (est + x / est) / 2
  1754. r128__udiv(&newEst, &x, &est);
  1755. r128Add(&newEst, &newEst, &est);
  1756. r128Shr(&newEst, &newEst, 1);
  1757. if (newEst.lo == est.lo && newEst.hi == est.hi) {
  1758. break;
  1759. }
  1760. R128_SET2(&est, newEst.lo, newEst.hi);
  1761. }
  1762. r128Copy(dst, &est);
  1763. }
  1764. int r128Cmp(const R128 *a, const R128 *b)
  1765. {
  1766. R128_ASSERT(a != NULL);
  1767. R128_ASSERT(b != NULL);
  1768. if (a->hi == b->hi) {
  1769. if (a->lo == b->lo) {
  1770. return 0;
  1771. } else if (a->lo > b->lo) {
  1772. return 1;
  1773. } else {
  1774. return -1;
  1775. }
  1776. } else if ((R128_S64)a->hi > (R128_S64)b->hi) {
  1777. return 1;
  1778. } else {
  1779. return -1;
  1780. }
  1781. }
  1782. int r128IsNeg(const R128 *v)
  1783. {
  1784. R128_ASSERT(v != NULL);
  1785. return (R128_S64)v->hi < 0;
  1786. }
  1787. void r128Min(R128 *dst, const R128 *a, const R128 *b)
  1788. {
  1789. R128_ASSERT(dst != NULL);
  1790. R128_ASSERT(a != NULL);
  1791. R128_ASSERT(b != NULL);
  1792. if (r128Cmp(a, b) < 0) {
  1793. r128Copy(dst, a);
  1794. } else {
  1795. r128Copy(dst, b);
  1796. }
  1797. }
  1798. void r128Max(R128 *dst, const R128 *a, const R128 *b)
  1799. {
  1800. R128_ASSERT(dst != NULL);
  1801. R128_ASSERT(a != NULL);
  1802. R128_ASSERT(b != NULL);
  1803. if (r128Cmp(a, b) > 0) {
  1804. r128Copy(dst, a);
  1805. } else {
  1806. r128Copy(dst, b);
  1807. }
  1808. }
  1809. void r128Floor(R128 *dst, const R128 *v)
  1810. {
  1811. R128_ASSERT(dst != NULL);
  1812. R128_ASSERT(v != NULL);
  1813. dst->hi = v->hi;
  1814. dst->lo = 0;
  1815. R128_DEBUG_SET(dst);
  1816. }
  1817. void r128Ceil(R128 *dst, const R128 *v)
  1818. {
  1819. R128_ASSERT(dst != NULL);
  1820. R128_ASSERT(v != NULL);
  1821. dst->hi = v->hi + (v->lo != 0);
  1822. dst->lo = 0;
  1823. R128_DEBUG_SET(dst);
  1824. }
  1825. void r128Round(R128* dst, const R128* v)
  1826. {
  1827. R128_ASSERT(dst != NULL);
  1828. R128_ASSERT(v != NULL);
  1829. dst->hi = v->hi + (v->lo >= R128_LIT_U64(0x8000000000000000) + (R128_U64)((R128_S64)v->hi < 0));
  1830. dst->lo = 0;
  1831. R128_DEBUG_SET(dst);
  1832. }
  1833. #endif //R128_IMPLEMENTATION