mpint_i.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. /*
  2. * mpint_i.h: definitions used internally by the bignum code, and
  3. * also a few other vaguely-bignum-like places.
  4. */
  5. /* ----------------------------------------------------------------------
  6. * The assorted conditional definitions of BignumInt and multiply
  7. * macros used throughout the bignum code to treat numbers as arrays
  8. * of the most conveniently sized word for the target machine.
  9. * Exported so that other code (e.g. poly1305) can use it too.
  10. *
  11. * This code must export, in whatever ifdef branch it ends up in:
  12. *
  13. * - two types: 'BignumInt' and 'BignumCarry'. BignumInt is an
  14. * unsigned integer type which will be used as the base word size
  15. * for all bignum operations. BignumCarry is an unsigned integer
  16. * type used to hold the carry flag taken as input and output by
  17. * the BignumADC macro (see below).
  18. *
  19. * - five constant macros:
  20. * + BIGNUM_INT_BITS, the number of bits in BignumInt,
  21. * + BIGNUM_INT_BYTES, the number of bytes that works out to
  22. * + BIGNUM_TOP_BIT, the BignumInt value consisting of only the top bit
  23. * + BIGNUM_INT_MASK, the BignumInt value with all bits set
  24. * + BIGNUM_INT_BITS_BITS, log to the base 2 of BIGNUM_INT_BITS.
  25. *
  26. * - four statement macros: BignumADC, BignumMUL, BignumMULADD,
  27. * BignumMULADD2. These do various kinds of multi-word arithmetic,
  28. * and all produce two output values.
  29. * * BignumADC(ret,retc,a,b,c) takes input BignumInt values a,b
  30. * and a BignumCarry c, and outputs a BignumInt ret = a+b+c and
  31. * a BignumCarry retc which is the carry off the top of that
  32. * addition.
  33. * * BignumMUL(rh,rl,a,b) returns the two halves of the
  34. * double-width product a*b.
  35. * * BignumMULADD(rh,rl,a,b,addend) returns the two halves of the
  36. * double-width value a*b + addend.
  37. * * BignumMULADD2(rh,rl,a,b,addend1,addend2) returns the two
  38. * halves of the double-width value a*b + addend1 + addend2.
  39. *
  40. * Every branch of the main ifdef below defines the type BignumInt and
  41. * the value BIGNUM_INT_BITS_BITS. The other constant macros are
  42. * filled in by common code further down.
  43. *
  44. * Most branches also define a macro DEFINE_BIGNUMDBLINT containing a
  45. * typedef statement which declares a type _twice_ the length of a
  46. * BignumInt. This causes the common code further down to produce a
  47. * default implementation of the four statement macros in terms of
  48. * that double-width type, and also to defined BignumCarry to be
  49. * BignumInt.
  50. *
  51. * However, if a particular compile target does not have a type twice
  52. * the length of the BignumInt you want to use but it does provide
  53. * some alternative means of doing add-with-carry and double-word
  54. * multiply, then the ifdef branch in question can just define
  55. * BignumCarry and the four statement macros itself, and that's fine
  56. * too.
  57. */
  58. /* You can lower the BignumInt size by defining BIGNUM_OVERRIDE on the
  59. * command line to be your chosen max value of BIGNUM_INT_BITS_BITS */
  60. #if defined BIGNUM_OVERRIDE
  61. #define BB_OK(b) ((b) <= BIGNUM_OVERRIDE)
  62. #else
  63. #define BB_OK(b) (1)
  64. #endif
  65. #if defined __SIZEOF_INT128__ && BB_OK(6)
  66. /*
  67. * 64-bit BignumInt using gcc/clang style 128-bit BignumDblInt.
  68. *
  69. * gcc and clang both provide a __uint128_t type on 64-bit targets
  70. * (and, when they do, indicate its presence by the above macro),
  71. * using the same 'two machine registers' kind of code generation
  72. * that 32-bit targets use for 64-bit ints.
  73. */
  74. typedef unsigned long long BignumInt;
  75. #define BIGNUM_INT_BITS_BITS 6
  76. #define DEFINE_BIGNUMDBLINT typedef __uint128_t BignumDblInt
  77. #elif defined _MSC_VER && defined _M_AMD64 && BB_OK(6)
  78. /*
  79. * 64-bit BignumInt, using Visual Studio x86-64 compiler intrinsics.
  80. *
  81. * 64-bit Visual Studio doesn't provide very much in the way of help
  82. * here: there's no int128 type, and also no inline assembler giving
  83. * us direct access to the x86-64 MUL or ADC instructions. However,
  84. * there are compiler intrinsics giving us that access, so we can
  85. * use those - though it turns out we have to be a little careful,
  86. * since they seem to generate wrong code if their pointer-typed
  87. * output parameters alias their inputs. Hence all the internal temp
  88. * variables inside the macros.
  89. */
  90. #include <intrin.h>
  91. typedef unsigned char BignumCarry; /* the type _addcarry_u64 likes to use */
  92. typedef unsigned __int64 BignumInt;
  93. #define BIGNUM_INT_BITS_BITS 6
  94. #define BignumADC(ret, retc, a, b, c) do \
  95. { \
  96. BignumInt ADC_tmp; \
  97. (retc) = _addcarry_u64(c, a, b, &ADC_tmp); \
  98. (ret) = ADC_tmp; \
  99. } while (0)
  100. #define BignumMUL(rh, rl, a, b) do \
  101. { \
  102. BignumInt MULADD_hi; \
  103. (rl) = _umul128(a, b, &MULADD_hi); \
  104. (rh) = MULADD_hi; \
  105. } while (0)
  106. #define BignumMULADD(rh, rl, a, b, addend) do \
  107. { \
  108. BignumInt MULADD_lo, MULADD_hi; \
  109. MULADD_lo = _umul128(a, b, &MULADD_hi); \
  110. MULADD_hi += _addcarry_u64(0, MULADD_lo, (addend), &(rl)); \
  111. (rh) = MULADD_hi; \
  112. } while (0)
  113. #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do \
  114. { \
  115. BignumInt MULADD_lo1, MULADD_lo2, MULADD_hi; \
  116. MULADD_lo1 = _umul128(a, b, &MULADD_hi); \
  117. MULADD_hi += _addcarry_u64(0, MULADD_lo1, (addend1), &MULADD_lo2); \
  118. MULADD_hi += _addcarry_u64(0, MULADD_lo2, (addend2), &(rl)); \
  119. (rh) = MULADD_hi; \
  120. } while (0)
  121. #elif (defined __GNUC__ || defined _LLP64 || __STDC__ >= 199901L) && BB_OK(5)
  122. /* 32-bit BignumInt, using C99 unsigned long long as BignumDblInt */
  123. typedef unsigned int BignumInt;
  124. #define BIGNUM_INT_BITS_BITS 5
  125. #define DEFINE_BIGNUMDBLINT typedef unsigned long long BignumDblInt
  126. #elif defined _MSC_VER && BB_OK(5)
  127. /* 32-bit BignumInt, using Visual Studio __int64 as BignumDblInt */
  128. typedef unsigned int BignumInt;
  129. #define BIGNUM_INT_BITS_BITS 5
  130. #define DEFINE_BIGNUMDBLINT typedef unsigned __int64 BignumDblInt
  131. #elif defined _LP64 && BB_OK(5)
  132. /*
  133. * 32-bit BignumInt, using unsigned long itself as BignumDblInt.
  134. *
  135. * Only for platforms where long is 64 bits, of course.
  136. */
  137. typedef unsigned int BignumInt;
  138. #define BIGNUM_INT_BITS_BITS 5
  139. #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
  140. #elif BB_OK(4)
  141. /*
  142. * 16-bit BignumInt, using unsigned long as BignumDblInt.
  143. *
  144. * This is the final fallback for real emergencies: C89 guarantees
  145. * unsigned short/long to be at least the required sizes, so this
  146. * should work on any C implementation at all. But it'll be
  147. * noticeably slow, so if you find yourself in this case you
  148. * probably want to move heaven and earth to find an alternative!
  149. */
  150. typedef unsigned short BignumInt;
  151. #define BIGNUM_INT_BITS_BITS 4
  152. #define DEFINE_BIGNUMDBLINT typedef unsigned long BignumDblInt
  153. #else
  154. /* Should only get here if BB_OK(4) evaluated false, i.e. the
  155. * command line defined BIGNUM_OVERRIDE to an absurdly small
  156. * value. */
  157. #error Must define BIGNUM_OVERRIDE to at least 4
  158. #endif
  159. #undef BB_OK
  160. /*
  161. * Common code across all branches of that ifdef: define all the
  162. * easy constant macros in terms of BIGNUM_INT_BITS_BITS.
  163. */
  164. #define BIGNUM_INT_BITS (1 << BIGNUM_INT_BITS_BITS)
  165. #define BIGNUM_INT_BYTES (BIGNUM_INT_BITS / 8)
  166. #define BIGNUM_TOP_BIT (((BignumInt)1) << (BIGNUM_INT_BITS-1))
  167. #define BIGNUM_INT_MASK (BIGNUM_TOP_BIT | (BIGNUM_TOP_BIT-1))
  168. /*
  169. * Just occasionally, we might need a GET_nnBIT_xSB_FIRST macro to
  170. * operate on whatever BignumInt is.
  171. */
  172. #if BIGNUM_INT_BITS_BITS == 4
  173. #define GET_BIGNUMINT_MSB_FIRST GET_16BIT_MSB_FIRST
  174. #define GET_BIGNUMINT_LSB_FIRST GET_16BIT_LSB_FIRST
  175. #define PUT_BIGNUMINT_MSB_FIRST PUT_16BIT_MSB_FIRST
  176. #define PUT_BIGNUMINT_LSB_FIRST PUT_16BIT_LSB_FIRST
  177. #elif BIGNUM_INT_BITS_BITS == 5
  178. #define GET_BIGNUMINT_MSB_FIRST GET_32BIT_MSB_FIRST
  179. #define GET_BIGNUMINT_LSB_FIRST GET_32BIT_LSB_FIRST
  180. #define PUT_BIGNUMINT_MSB_FIRST PUT_32BIT_MSB_FIRST
  181. #define PUT_BIGNUMINT_LSB_FIRST PUT_32BIT_LSB_FIRST
  182. #elif BIGNUM_INT_BITS_BITS == 6
  183. #define GET_BIGNUMINT_MSB_FIRST GET_64BIT_MSB_FIRST
  184. #define GET_BIGNUMINT_LSB_FIRST GET_64BIT_LSB_FIRST
  185. #define PUT_BIGNUMINT_MSB_FIRST PUT_64BIT_MSB_FIRST
  186. #define PUT_BIGNUMINT_LSB_FIRST PUT_64BIT_LSB_FIRST
  187. #else
  188. #error Ran out of options for GET_BIGNUMINT_xSB_FIRST
  189. #endif
  190. /*
  191. * Common code across _most_ branches of the ifdef: define a set of
  192. * statement macros in terms of the BignumDblInt type provided. In
  193. * this case, we also define BignumCarry to be the same thing as
  194. * BignumInt, for simplicity.
  195. */
  196. #ifdef DEFINE_BIGNUMDBLINT
  197. typedef BignumInt BignumCarry;
  198. #define BignumADC(ret, retc, a, b, c) do \
  199. { \
  200. DEFINE_BIGNUMDBLINT; \
  201. BignumDblInt ADC_temp = (BignumInt)(a); \
  202. ADC_temp += (BignumInt)(b); \
  203. ADC_temp += (c); \
  204. (ret) = (BignumInt)ADC_temp; \
  205. (retc) = (BignumCarry)(ADC_temp >> BIGNUM_INT_BITS); \
  206. } while (0)
  207. #define BignumMUL(rh, rl, a, b) do \
  208. { \
  209. DEFINE_BIGNUMDBLINT; \
  210. BignumDblInt MUL_temp = (BignumInt)(a); \
  211. MUL_temp *= (BignumInt)(b); \
  212. (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
  213. (rl) = (BignumInt)(MUL_temp); \
  214. } while (0)
  215. #define BignumMULADD(rh, rl, a, b, addend) do \
  216. { \
  217. DEFINE_BIGNUMDBLINT; \
  218. BignumDblInt MUL_temp = (BignumInt)(a); \
  219. MUL_temp *= (BignumInt)(b); \
  220. MUL_temp += (BignumInt)(addend); \
  221. (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
  222. (rl) = (BignumInt)(MUL_temp); \
  223. } while (0)
  224. #define BignumMULADD2(rh, rl, a, b, addend1, addend2) do \
  225. { \
  226. DEFINE_BIGNUMDBLINT; \
  227. BignumDblInt MUL_temp = (BignumInt)(a); \
  228. MUL_temp *= (BignumInt)(b); \
  229. MUL_temp += (BignumInt)(addend1); \
  230. MUL_temp += (BignumInt)(addend2); \
  231. (rh) = (BignumInt)(MUL_temp >> BIGNUM_INT_BITS); \
  232. (rl) = (BignumInt)(MUL_temp); \
  233. } while (0)
  234. #endif /* DEFINE_BIGNUMDBLINT */
  235. /* ----------------------------------------------------------------------
  236. * Data structures used inside mpint.c.
  237. */
  238. struct mp_int {
  239. size_t nw;
  240. BignumInt *w;
  241. };
  242. struct MontyContext {
  243. /*
  244. * The actual modulus.
  245. */
  246. mp_int *m;
  247. /*
  248. * Montgomery multiplication works by selecting a value r > m,
  249. * coprime to m, which is really easy to divide by. In binary
  250. * arithmetic, that means making it a power of 2; in fact we make
  251. * it a whole number of BignumInt.
  252. *
  253. * We don't store r directly as an mp_int (there's no need). But
  254. * its value is 2^rbits; we also store rw = rbits/BIGNUM_INT_BITS
  255. * (the corresponding word offset within an mp_int).
  256. *
  257. * pw is the number of words needed to store an mp_int you're
  258. * doing reduction on: it has to be big enough to hold the sum of
  259. * an input value up to m^2 plus an extra addend up to m*r.
  260. */
  261. size_t rbits, rw, pw;
  262. /*
  263. * The key step in Montgomery reduction requires the inverse of -m
  264. * mod r.
  265. */
  266. mp_int *minus_minv_mod_r;
  267. /*
  268. * r^1, r^2 and r^3 mod m, which are used for various purposes.
  269. *
  270. * (Annoyingly, this is one of the rare cases where it would have
  271. * been nicer to have a Pascal-style 1-indexed array. I couldn't
  272. * _quite_ bring myself to put a gratuitous zero element in here.
  273. * So you just have to live with getting r^k by taking the [k-1]th
  274. * element of this array.)
  275. */
  276. mp_int *powers_of_r_mod_m[3];
  277. /*
  278. * Persistent scratch space from which monty_* functions can
  279. * allocate storage for intermediate values.
  280. */
  281. mp_int *scratch;
  282. };
  283. /* Functions shared between mpint.c and mpunsafe.c */
  284. mp_int *mp_make_sized(size_t nw);