mpih-div.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /* mpihelp-div.c - MPI helper functions
  2. * Copyright (C) 1994, 1996 Free Software Foundation, Inc.
  3. * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
  4. *
  5. * This file is part of GnuPG.
  6. *
  7. * GnuPG is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * GnuPG is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
  20. *
  21. * Note: This code is heavily based on the GNU MP Library.
  22. * Actually it's the same code with only minor changes in the
  23. * way the data is stored; this is to support the abstraction
  24. * of an optional secure memory allocation which may be used
  25. * to avoid revealing of sensitive data due to paging etc.
  26. * The GNU MP Library itself is published under the LGPL;
  27. * however I decided to publish this code under the plain GPL.
  28. */
  29. #include "mpi-internal.h"
  30. #include "longlong.h"
  31. #ifndef UMUL_TIME
  32. #define UMUL_TIME 1
  33. #endif
  34. #ifndef UDIV_TIME
  35. #define UDIV_TIME UMUL_TIME
  36. #endif
  37. /* FIXME: We should be using invert_limb (or invert_normalized_limb)
  38. * here (not udiv_qrnnd).
  39. */
  40. mpi_limb_t
  41. mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
  42. mpi_limb_t divisor_limb)
  43. {
  44. mpi_size_t i;
  45. mpi_limb_t n1, n0, r;
  46. int dummy;
  47. /* Botch: Should this be handled at all? Rely on callers? */
  48. if (!dividend_size)
  49. return 0;
  50. /* If multiplication is much faster than division, and the
  51. * dividend is large, pre-invert the divisor, and use
  52. * only multiplications in the inner loop.
  53. *
  54. * This test should be read:
  55. * Does it ever help to use udiv_qrnnd_preinv?
  56. * && Does what we save compensate for the inversion overhead?
  57. */
  58. if (UDIV_TIME > (2 * UMUL_TIME + 6)
  59. && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
  60. int normalization_steps;
  61. count_leading_zeros(normalization_steps, divisor_limb);
  62. if (normalization_steps) {
  63. mpi_limb_t divisor_limb_inverted;
  64. divisor_limb <<= normalization_steps;
  65. /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
  66. * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
  67. * most significant bit (with weight 2**N) implicit.
  68. *
  69. * Special case for DIVISOR_LIMB == 100...000.
  70. */
  71. if (!(divisor_limb << 1))
  72. divisor_limb_inverted = ~(mpi_limb_t) 0;
  73. else
  74. udiv_qrnnd(divisor_limb_inverted, dummy,
  75. -divisor_limb, 0, divisor_limb);
  76. n1 = dividend_ptr[dividend_size - 1];
  77. r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
  78. /* Possible optimization:
  79. * if (r == 0
  80. * && divisor_limb > ((n1 << normalization_steps)
  81. * | (dividend_ptr[dividend_size - 2] >> ...)))
  82. * ...one division less...
  83. */
  84. for (i = dividend_size - 2; i >= 0; i--) {
  85. n0 = dividend_ptr[i];
  86. UDIV_QRNND_PREINV(dummy, r, r,
  87. ((n1 << normalization_steps)
  88. | (n0 >>
  89. (BITS_PER_MPI_LIMB -
  90. normalization_steps))),
  91. divisor_limb,
  92. divisor_limb_inverted);
  93. n1 = n0;
  94. }
  95. UDIV_QRNND_PREINV(dummy, r, r,
  96. n1 << normalization_steps,
  97. divisor_limb, divisor_limb_inverted);
  98. return r >> normalization_steps;
  99. } else {
  100. mpi_limb_t divisor_limb_inverted;
  101. /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
  102. * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
  103. * most significant bit (with weight 2**N) implicit.
  104. *
  105. * Special case for DIVISOR_LIMB == 100...000.
  106. */
  107. if (!(divisor_limb << 1))
  108. divisor_limb_inverted = ~(mpi_limb_t) 0;
  109. else
  110. udiv_qrnnd(divisor_limb_inverted, dummy,
  111. -divisor_limb, 0, divisor_limb);
  112. i = dividend_size - 1;
  113. r = dividend_ptr[i];
  114. if (r >= divisor_limb)
  115. r = 0;
  116. else
  117. i--;
  118. for (; i >= 0; i--) {
  119. n0 = dividend_ptr[i];
  120. UDIV_QRNND_PREINV(dummy, r, r,
  121. n0, divisor_limb,
  122. divisor_limb_inverted);
  123. }
  124. return r;
  125. }
  126. } else {
  127. if (UDIV_NEEDS_NORMALIZATION) {
  128. int normalization_steps;
  129. count_leading_zeros(normalization_steps, divisor_limb);
  130. if (normalization_steps) {
  131. divisor_limb <<= normalization_steps;
  132. n1 = dividend_ptr[dividend_size - 1];
  133. r = n1 >> (BITS_PER_MPI_LIMB -
  134. normalization_steps);
  135. /* Possible optimization:
  136. * if (r == 0
  137. * && divisor_limb > ((n1 << normalization_steps)
  138. * | (dividend_ptr[dividend_size - 2] >> ...)))
  139. * ...one division less...
  140. */
  141. for (i = dividend_size - 2; i >= 0; i--) {
  142. n0 = dividend_ptr[i];
  143. udiv_qrnnd(dummy, r, r,
  144. ((n1 << normalization_steps)
  145. | (n0 >>
  146. (BITS_PER_MPI_LIMB -
  147. normalization_steps))),
  148. divisor_limb);
  149. n1 = n0;
  150. }
  151. udiv_qrnnd(dummy, r, r,
  152. n1 << normalization_steps,
  153. divisor_limb);
  154. return r >> normalization_steps;
  155. }
  156. }
  157. /* No normalization needed, either because udiv_qrnnd doesn't require
  158. * it, or because DIVISOR_LIMB is already normalized. */
  159. i = dividend_size - 1;
  160. r = dividend_ptr[i];
  161. if (r >= divisor_limb)
  162. r = 0;
  163. else
  164. i--;
  165. for (; i >= 0; i--) {
  166. n0 = dividend_ptr[i];
  167. udiv_qrnnd(dummy, r, r, n0, divisor_limb);
  168. }
  169. return r;
  170. }
  171. }
  172. /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
  173. * the NSIZE-DSIZE least significant quotient limbs at QP
  174. * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
  175. * non-zero, generate that many fraction bits and append them after the
  176. * other quotient limbs.
  177. * Return the most significant limb of the quotient, this is always 0 or 1.
  178. *
  179. * Preconditions:
  180. * 0. NSIZE >= DSIZE.
  181. * 1. The most significant bit of the divisor must be set.
  182. * 2. QP must either not overlap with the input operands at all, or
  183. * QP + DSIZE >= NP must hold true. (This means that it's
  184. * possible to put the quotient in the high part of NUM, right after the
  185. * remainder in NUM.
  186. * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
  187. */
  188. mpi_limb_t
  189. mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
  190. mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
  191. {
  192. mpi_limb_t most_significant_q_limb = 0;
  193. switch (dsize) {
  194. case 0:
  195. /* We are asked to divide by zero, so go ahead and do it! (To make
  196. the compiler not remove this statement, return the value.) */
  197. /*
  198. * existing clients of this function have been modified
  199. * not to call it with dsize == 0, so this should not happen
  200. */
  201. return 1 / dsize;
  202. case 1:
  203. {
  204. mpi_size_t i;
  205. mpi_limb_t n1;
  206. mpi_limb_t d;
  207. d = dp[0];
  208. n1 = np[nsize - 1];
  209. if (n1 >= d) {
  210. n1 -= d;
  211. most_significant_q_limb = 1;
  212. }
  213. qp += qextra_limbs;
  214. for (i = nsize - 2; i >= 0; i--)
  215. udiv_qrnnd(qp[i], n1, n1, np[i], d);
  216. qp -= qextra_limbs;
  217. for (i = qextra_limbs - 1; i >= 0; i--)
  218. udiv_qrnnd(qp[i], n1, n1, 0, d);
  219. np[0] = n1;
  220. }
  221. break;
  222. case 2:
  223. {
  224. mpi_size_t i;
  225. mpi_limb_t n1, n0, n2;
  226. mpi_limb_t d1, d0;
  227. np += nsize - 2;
  228. d1 = dp[1];
  229. d0 = dp[0];
  230. n1 = np[1];
  231. n0 = np[0];
  232. if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
  233. sub_ddmmss(n1, n0, n1, n0, d1, d0);
  234. most_significant_q_limb = 1;
  235. }
  236. for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
  237. mpi_limb_t q;
  238. mpi_limb_t r;
  239. if (i >= qextra_limbs)
  240. np--;
  241. else
  242. np[0] = 0;
  243. if (n1 == d1) {
  244. /* Q should be either 111..111 or 111..110. Need special
  245. * treatment of this rare case as normal division would
  246. * give overflow. */
  247. q = ~(mpi_limb_t) 0;
  248. r = n0 + d1;
  249. if (r < d1) { /* Carry in the addition? */
  250. add_ssaaaa(n1, n0, r - d0,
  251. np[0], 0, d0);
  252. qp[i] = q;
  253. continue;
  254. }
  255. n1 = d0 - (d0 != 0 ? 1 : 0);
  256. n0 = -d0;
  257. } else {
  258. udiv_qrnnd(q, r, n1, n0, d1);
  259. umul_ppmm(n1, n0, d0, q);
  260. }
  261. n2 = np[0];
  262. q_test:
  263. if (n1 > r || (n1 == r && n0 > n2)) {
  264. /* The estimated Q was too large. */
  265. q--;
  266. sub_ddmmss(n1, n0, n1, n0, 0, d0);
  267. r += d1;
  268. if (r >= d1) /* If not carry, test Q again. */
  269. goto q_test;
  270. }
  271. qp[i] = q;
  272. sub_ddmmss(n1, n0, r, n2, n1, n0);
  273. }
  274. np[1] = n1;
  275. np[0] = n0;
  276. }
  277. break;
  278. default:
  279. {
  280. mpi_size_t i;
  281. mpi_limb_t dX, d1, n0;
  282. np += nsize - dsize;
  283. dX = dp[dsize - 1];
  284. d1 = dp[dsize - 2];
  285. n0 = np[dsize - 1];
  286. if (n0 >= dX) {
  287. if (n0 > dX
  288. || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
  289. mpihelp_sub_n(np, np, dp, dsize);
  290. n0 = np[dsize - 1];
  291. most_significant_q_limb = 1;
  292. }
  293. }
  294. for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
  295. mpi_limb_t q;
  296. mpi_limb_t n1, n2;
  297. mpi_limb_t cy_limb;
  298. if (i >= qextra_limbs) {
  299. np--;
  300. n2 = np[dsize];
  301. } else {
  302. n2 = np[dsize - 1];
  303. MPN_COPY_DECR(np + 1, np, dsize - 1);
  304. np[0] = 0;
  305. }
  306. if (n0 == dX) {
  307. /* This might over-estimate q, but it's probably not worth
  308. * the extra code here to find out. */
  309. q = ~(mpi_limb_t) 0;
  310. } else {
  311. mpi_limb_t r;
  312. udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
  313. umul_ppmm(n1, n0, d1, q);
  314. while (n1 > r
  315. || (n1 == r
  316. && n0 > np[dsize - 2])) {
  317. q--;
  318. r += dX;
  319. if (r < dX) /* I.e. "carry in previous addition?" */
  320. break;
  321. n1 -= n0 < d1;
  322. n0 -= d1;
  323. }
  324. }
  325. /* Possible optimization: We already have (q * n0) and (1 * n1)
  326. * after the calculation of q. Taking advantage of that, we
  327. * could make this loop make two iterations less. */
  328. cy_limb = mpihelp_submul_1(np, dp, dsize, q);
  329. if (n2 != cy_limb) {
  330. mpihelp_add_n(np, np, dp, dsize);
  331. q--;
  332. }
  333. qp[i] = q;
  334. n0 = np[dsize - 1];
  335. }
  336. }
  337. }
  338. return most_significant_q_limb;
  339. }
  340. /****************
  341. * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
  342. * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
  343. * Return the single-limb remainder.
  344. * There are no constraints on the value of the divisor.
  345. *
  346. * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
  347. */
  348. mpi_limb_t
  349. mpihelp_divmod_1(mpi_ptr_t quot_ptr,
  350. mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
  351. mpi_limb_t divisor_limb)
  352. {
  353. mpi_size_t i;
  354. mpi_limb_t n1, n0, r;
  355. int dummy;
  356. if (!dividend_size)
  357. return 0;
  358. /* If multiplication is much faster than division, and the
  359. * dividend is large, pre-invert the divisor, and use
  360. * only multiplications in the inner loop.
  361. *
  362. * This test should be read:
  363. * Does it ever help to use udiv_qrnnd_preinv?
  364. * && Does what we save compensate for the inversion overhead?
  365. */
  366. if (UDIV_TIME > (2 * UMUL_TIME + 6)
  367. && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
  368. int normalization_steps;
  369. count_leading_zeros(normalization_steps, divisor_limb);
  370. if (normalization_steps) {
  371. mpi_limb_t divisor_limb_inverted;
  372. divisor_limb <<= normalization_steps;
  373. /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
  374. * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
  375. * most significant bit (with weight 2**N) implicit.
  376. */
  377. /* Special case for DIVISOR_LIMB == 100...000. */
  378. if (!(divisor_limb << 1))
  379. divisor_limb_inverted = ~(mpi_limb_t) 0;
  380. else
  381. udiv_qrnnd(divisor_limb_inverted, dummy,
  382. -divisor_limb, 0, divisor_limb);
  383. n1 = dividend_ptr[dividend_size - 1];
  384. r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
  385. /* Possible optimization:
  386. * if (r == 0
  387. * && divisor_limb > ((n1 << normalization_steps)
  388. * | (dividend_ptr[dividend_size - 2] >> ...)))
  389. * ...one division less...
  390. */
  391. for (i = dividend_size - 2; i >= 0; i--) {
  392. n0 = dividend_ptr[i];
  393. UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
  394. ((n1 << normalization_steps)
  395. | (n0 >>
  396. (BITS_PER_MPI_LIMB -
  397. normalization_steps))),
  398. divisor_limb,
  399. divisor_limb_inverted);
  400. n1 = n0;
  401. }
  402. UDIV_QRNND_PREINV(quot_ptr[0], r, r,
  403. n1 << normalization_steps,
  404. divisor_limb, divisor_limb_inverted);
  405. return r >> normalization_steps;
  406. } else {
  407. mpi_limb_t divisor_limb_inverted;
  408. /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
  409. * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
  410. * most significant bit (with weight 2**N) implicit.
  411. */
  412. /* Special case for DIVISOR_LIMB == 100...000. */
  413. if (!(divisor_limb << 1))
  414. divisor_limb_inverted = ~(mpi_limb_t) 0;
  415. else
  416. udiv_qrnnd(divisor_limb_inverted, dummy,
  417. -divisor_limb, 0, divisor_limb);
  418. i = dividend_size - 1;
  419. r = dividend_ptr[i];
  420. if (r >= divisor_limb)
  421. r = 0;
  422. else
  423. quot_ptr[i--] = 0;
  424. for (; i >= 0; i--) {
  425. n0 = dividend_ptr[i];
  426. UDIV_QRNND_PREINV(quot_ptr[i], r, r,
  427. n0, divisor_limb,
  428. divisor_limb_inverted);
  429. }
  430. return r;
  431. }
  432. } else {
  433. if (UDIV_NEEDS_NORMALIZATION) {
  434. int normalization_steps;
  435. count_leading_zeros(normalization_steps, divisor_limb);
  436. if (normalization_steps) {
  437. divisor_limb <<= normalization_steps;
  438. n1 = dividend_ptr[dividend_size - 1];
  439. r = n1 >> (BITS_PER_MPI_LIMB -
  440. normalization_steps);
  441. /* Possible optimization:
  442. * if (r == 0
  443. * && divisor_limb > ((n1 << normalization_steps)
  444. * | (dividend_ptr[dividend_size - 2] >> ...)))
  445. * ...one division less...
  446. */
  447. for (i = dividend_size - 2; i >= 0; i--) {
  448. n0 = dividend_ptr[i];
  449. udiv_qrnnd(quot_ptr[i + 1], r, r,
  450. ((n1 << normalization_steps)
  451. | (n0 >>
  452. (BITS_PER_MPI_LIMB -
  453. normalization_steps))),
  454. divisor_limb);
  455. n1 = n0;
  456. }
  457. udiv_qrnnd(quot_ptr[0], r, r,
  458. n1 << normalization_steps,
  459. divisor_limb);
  460. return r >> normalization_steps;
  461. }
  462. }
  463. /* No normalization needed, either because udiv_qrnnd doesn't require
  464. * it, or because DIVISOR_LIMB is already normalized. */
  465. i = dividend_size - 1;
  466. r = dividend_ptr[i];
  467. if (r >= divisor_limb)
  468. r = 0;
  469. else
  470. quot_ptr[i--] = 0;
  471. for (; i >= 0; i--) {
  472. n0 = dividend_ptr[i];
  473. udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
  474. }
  475. return r;
  476. }
  477. }