fe.c 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492
  1. #include "fixedint.h"
  2. #include "fe.h"
  3. /*
  4. helper functions
  5. */
  6. static uint64_t load_3(const unsigned char *in) {
  7. uint64_t result;
  8. result = (uint64_t) in[0];
  9. result |= ((uint64_t) in[1]) << 8;
  10. result |= ((uint64_t) in[2]) << 16;
  11. return result;
  12. }
  13. static uint64_t load_4(const unsigned char *in) {
  14. uint64_t result;
  15. result = (uint64_t) in[0];
  16. result |= ((uint64_t) in[1]) << 8;
  17. result |= ((uint64_t) in[2]) << 16;
  18. result |= ((uint64_t) in[3]) << 24;
  19. return result;
  20. }
  21. /*
  22. h = 0
  23. */
  24. void fe_0(fe h) {
  25. h[0] = 0;
  26. h[1] = 0;
  27. h[2] = 0;
  28. h[3] = 0;
  29. h[4] = 0;
  30. h[5] = 0;
  31. h[6] = 0;
  32. h[7] = 0;
  33. h[8] = 0;
  34. h[9] = 0;
  35. }
  36. /*
  37. h = 1
  38. */
  39. void fe_1(fe h) {
  40. h[0] = 1;
  41. h[1] = 0;
  42. h[2] = 0;
  43. h[3] = 0;
  44. h[4] = 0;
  45. h[5] = 0;
  46. h[6] = 0;
  47. h[7] = 0;
  48. h[8] = 0;
  49. h[9] = 0;
  50. }
  51. /*
  52. h = f + g
  53. Can overlap h with f or g.
  54. Preconditions:
  55. |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  56. |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  57. Postconditions:
  58. |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  59. */
  60. void fe_add(fe h, const fe f, const fe g) {
  61. int32_t f0 = f[0];
  62. int32_t f1 = f[1];
  63. int32_t f2 = f[2];
  64. int32_t f3 = f[3];
  65. int32_t f4 = f[4];
  66. int32_t f5 = f[5];
  67. int32_t f6 = f[6];
  68. int32_t f7 = f[7];
  69. int32_t f8 = f[8];
  70. int32_t f9 = f[9];
  71. int32_t g0 = g[0];
  72. int32_t g1 = g[1];
  73. int32_t g2 = g[2];
  74. int32_t g3 = g[3];
  75. int32_t g4 = g[4];
  76. int32_t g5 = g[5];
  77. int32_t g6 = g[6];
  78. int32_t g7 = g[7];
  79. int32_t g8 = g[8];
  80. int32_t g9 = g[9];
  81. int32_t h0 = f0 + g0;
  82. int32_t h1 = f1 + g1;
  83. int32_t h2 = f2 + g2;
  84. int32_t h3 = f3 + g3;
  85. int32_t h4 = f4 + g4;
  86. int32_t h5 = f5 + g5;
  87. int32_t h6 = f6 + g6;
  88. int32_t h7 = f7 + g7;
  89. int32_t h8 = f8 + g8;
  90. int32_t h9 = f9 + g9;
  91. h[0] = h0;
  92. h[1] = h1;
  93. h[2] = h2;
  94. h[3] = h3;
  95. h[4] = h4;
  96. h[5] = h5;
  97. h[6] = h6;
  98. h[7] = h7;
  99. h[8] = h8;
  100. h[9] = h9;
  101. }
  102. /*
  103. Replace (f,g) with (g,g) if b == 1;
  104. replace (f,g) with (f,g) if b == 0.
  105. Preconditions: b in {0,1}.
  106. */
  107. void fe_cmov(fe f, const fe g, unsigned int b) {
  108. int32_t f0 = f[0];
  109. int32_t f1 = f[1];
  110. int32_t f2 = f[2];
  111. int32_t f3 = f[3];
  112. int32_t f4 = f[4];
  113. int32_t f5 = f[5];
  114. int32_t f6 = f[6];
  115. int32_t f7 = f[7];
  116. int32_t f8 = f[8];
  117. int32_t f9 = f[9];
  118. int32_t g0 = g[0];
  119. int32_t g1 = g[1];
  120. int32_t g2 = g[2];
  121. int32_t g3 = g[3];
  122. int32_t g4 = g[4];
  123. int32_t g5 = g[5];
  124. int32_t g6 = g[6];
  125. int32_t g7 = g[7];
  126. int32_t g8 = g[8];
  127. int32_t g9 = g[9];
  128. int32_t x0 = f0 ^ g0;
  129. int32_t x1 = f1 ^ g1;
  130. int32_t x2 = f2 ^ g2;
  131. int32_t x3 = f3 ^ g3;
  132. int32_t x4 = f4 ^ g4;
  133. int32_t x5 = f5 ^ g5;
  134. int32_t x6 = f6 ^ g6;
  135. int32_t x7 = f7 ^ g7;
  136. int32_t x8 = f8 ^ g8;
  137. int32_t x9 = f9 ^ g9;
  138. b = (unsigned int) (- (int) b); /* silence warning */
  139. x0 &= b;
  140. x1 &= b;
  141. x2 &= b;
  142. x3 &= b;
  143. x4 &= b;
  144. x5 &= b;
  145. x6 &= b;
  146. x7 &= b;
  147. x8 &= b;
  148. x9 &= b;
  149. f[0] = f0 ^ x0;
  150. f[1] = f1 ^ x1;
  151. f[2] = f2 ^ x2;
  152. f[3] = f3 ^ x3;
  153. f[4] = f4 ^ x4;
  154. f[5] = f5 ^ x5;
  155. f[6] = f6 ^ x6;
  156. f[7] = f7 ^ x7;
  157. f[8] = f8 ^ x8;
  158. f[9] = f9 ^ x9;
  159. }
  160. /*
  161. Replace (f,g) with (g,f) if b == 1;
  162. replace (f,g) with (f,g) if b == 0.
  163. Preconditions: b in {0,1}.
  164. */
  165. void fe_cswap(fe f,fe g,unsigned int b) {
  166. int32_t f0 = f[0];
  167. int32_t f1 = f[1];
  168. int32_t f2 = f[2];
  169. int32_t f3 = f[3];
  170. int32_t f4 = f[4];
  171. int32_t f5 = f[5];
  172. int32_t f6 = f[6];
  173. int32_t f7 = f[7];
  174. int32_t f8 = f[8];
  175. int32_t f9 = f[9];
  176. int32_t g0 = g[0];
  177. int32_t g1 = g[1];
  178. int32_t g2 = g[2];
  179. int32_t g3 = g[3];
  180. int32_t g4 = g[4];
  181. int32_t g5 = g[5];
  182. int32_t g6 = g[6];
  183. int32_t g7 = g[7];
  184. int32_t g8 = g[8];
  185. int32_t g9 = g[9];
  186. int32_t x0 = f0 ^ g0;
  187. int32_t x1 = f1 ^ g1;
  188. int32_t x2 = f2 ^ g2;
  189. int32_t x3 = f3 ^ g3;
  190. int32_t x4 = f4 ^ g4;
  191. int32_t x5 = f5 ^ g5;
  192. int32_t x6 = f6 ^ g6;
  193. int32_t x7 = f7 ^ g7;
  194. int32_t x8 = f8 ^ g8;
  195. int32_t x9 = f9 ^ g9;
  196. b = (unsigned int) (- (int) b); /* silence warning */
  197. x0 &= b;
  198. x1 &= b;
  199. x2 &= b;
  200. x3 &= b;
  201. x4 &= b;
  202. x5 &= b;
  203. x6 &= b;
  204. x7 &= b;
  205. x8 &= b;
  206. x9 &= b;
  207. f[0] = f0 ^ x0;
  208. f[1] = f1 ^ x1;
  209. f[2] = f2 ^ x2;
  210. f[3] = f3 ^ x3;
  211. f[4] = f4 ^ x4;
  212. f[5] = f5 ^ x5;
  213. f[6] = f6 ^ x6;
  214. f[7] = f7 ^ x7;
  215. f[8] = f8 ^ x8;
  216. f[9] = f9 ^ x9;
  217. g[0] = g0 ^ x0;
  218. g[1] = g1 ^ x1;
  219. g[2] = g2 ^ x2;
  220. g[3] = g3 ^ x3;
  221. g[4] = g4 ^ x4;
  222. g[5] = g5 ^ x5;
  223. g[6] = g6 ^ x6;
  224. g[7] = g7 ^ x7;
  225. g[8] = g8 ^ x8;
  226. g[9] = g9 ^ x9;
  227. }
  228. /*
  229. h = f
  230. */
  231. void fe_copy(fe h, const fe f) {
  232. int32_t f0 = f[0];
  233. int32_t f1 = f[1];
  234. int32_t f2 = f[2];
  235. int32_t f3 = f[3];
  236. int32_t f4 = f[4];
  237. int32_t f5 = f[5];
  238. int32_t f6 = f[6];
  239. int32_t f7 = f[7];
  240. int32_t f8 = f[8];
  241. int32_t f9 = f[9];
  242. h[0] = f0;
  243. h[1] = f1;
  244. h[2] = f2;
  245. h[3] = f3;
  246. h[4] = f4;
  247. h[5] = f5;
  248. h[6] = f6;
  249. h[7] = f7;
  250. h[8] = f8;
  251. h[9] = f9;
  252. }
  253. /*
  254. Ignores top bit of h.
  255. */
  256. void fe_frombytes(fe h, const unsigned char *s) {
  257. int64_t h0 = load_4(s);
  258. int64_t h1 = load_3(s + 4) << 6;
  259. int64_t h2 = load_3(s + 7) << 5;
  260. int64_t h3 = load_3(s + 10) << 3;
  261. int64_t h4 = load_3(s + 13) << 2;
  262. int64_t h5 = load_4(s + 16);
  263. int64_t h6 = load_3(s + 20) << 7;
  264. int64_t h7 = load_3(s + 23) << 5;
  265. int64_t h8 = load_3(s + 26) << 4;
  266. int64_t h9 = (load_3(s + 29) & 8388607) << 2;
  267. int64_t carry0;
  268. int64_t carry1;
  269. int64_t carry2;
  270. int64_t carry3;
  271. int64_t carry4;
  272. int64_t carry5;
  273. int64_t carry6;
  274. int64_t carry7;
  275. int64_t carry8;
  276. int64_t carry9;
  277. carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
  278. h0 += carry9 * 19;
  279. h9 -= carry9 << 25;
  280. carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
  281. h2 += carry1;
  282. h1 -= carry1 << 25;
  283. carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
  284. h4 += carry3;
  285. h3 -= carry3 << 25;
  286. carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
  287. h6 += carry5;
  288. h5 -= carry5 << 25;
  289. carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
  290. h8 += carry7;
  291. h7 -= carry7 << 25;
  292. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  293. h1 += carry0;
  294. h0 -= carry0 << 26;
  295. carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
  296. h3 += carry2;
  297. h2 -= carry2 << 26;
  298. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  299. h5 += carry4;
  300. h4 -= carry4 << 26;
  301. carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
  302. h7 += carry6;
  303. h6 -= carry6 << 26;
  304. carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
  305. h9 += carry8;
  306. h8 -= carry8 << 26;
  307. h[0] = (int32_t) h0;
  308. h[1] = (int32_t) h1;
  309. h[2] = (int32_t) h2;
  310. h[3] = (int32_t) h3;
  311. h[4] = (int32_t) h4;
  312. h[5] = (int32_t) h5;
  313. h[6] = (int32_t) h6;
  314. h[7] = (int32_t) h7;
  315. h[8] = (int32_t) h8;
  316. h[9] = (int32_t) h9;
  317. }
  318. void fe_invert(fe out, const fe z) {
  319. fe t0;
  320. fe t1;
  321. fe t2;
  322. fe t3;
  323. int i;
  324. fe_sq(t0, z);
  325. for (i = 1; i < 1; ++i) {
  326. fe_sq(t0, t0);
  327. }
  328. fe_sq(t1, t0);
  329. for (i = 1; i < 2; ++i) {
  330. fe_sq(t1, t1);
  331. }
  332. fe_mul(t1, z, t1);
  333. fe_mul(t0, t0, t1);
  334. fe_sq(t2, t0);
  335. for (i = 1; i < 1; ++i) {
  336. fe_sq(t2, t2);
  337. }
  338. fe_mul(t1, t1, t2);
  339. fe_sq(t2, t1);
  340. for (i = 1; i < 5; ++i) {
  341. fe_sq(t2, t2);
  342. }
  343. fe_mul(t1, t2, t1);
  344. fe_sq(t2, t1);
  345. for (i = 1; i < 10; ++i) {
  346. fe_sq(t2, t2);
  347. }
  348. fe_mul(t2, t2, t1);
  349. fe_sq(t3, t2);
  350. for (i = 1; i < 20; ++i) {
  351. fe_sq(t3, t3);
  352. }
  353. fe_mul(t2, t3, t2);
  354. fe_sq(t2, t2);
  355. for (i = 1; i < 10; ++i) {
  356. fe_sq(t2, t2);
  357. }
  358. fe_mul(t1, t2, t1);
  359. fe_sq(t2, t1);
  360. for (i = 1; i < 50; ++i) {
  361. fe_sq(t2, t2);
  362. }
  363. fe_mul(t2, t2, t1);
  364. fe_sq(t3, t2);
  365. for (i = 1; i < 100; ++i) {
  366. fe_sq(t3, t3);
  367. }
  368. fe_mul(t2, t3, t2);
  369. fe_sq(t2, t2);
  370. for (i = 1; i < 50; ++i) {
  371. fe_sq(t2, t2);
  372. }
  373. fe_mul(t1, t2, t1);
  374. fe_sq(t1, t1);
  375. for (i = 1; i < 5; ++i) {
  376. fe_sq(t1, t1);
  377. }
  378. fe_mul(out, t1, t0);
  379. }
  380. /*
  381. return 1 if f is in {1,3,5,...,q-2}
  382. return 0 if f is in {0,2,4,...,q-1}
  383. Preconditions:
  384. |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  385. */
  386. int fe_isnegative(const fe f) {
  387. unsigned char s[32];
  388. fe_tobytes(s, f);
  389. return s[0] & 1;
  390. }
  391. /*
  392. return 1 if f == 0
  393. return 0 if f != 0
  394. Preconditions:
  395. |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  396. */
  397. int fe_isnonzero(const fe f) {
  398. unsigned char s[32];
  399. unsigned char r;
  400. fe_tobytes(s, f);
  401. r = s[0];
  402. #define F(i) r |= s[i]
  403. F(1);
  404. F(2);
  405. F(3);
  406. F(4);
  407. F(5);
  408. F(6);
  409. F(7);
  410. F(8);
  411. F(9);
  412. F(10);
  413. F(11);
  414. F(12);
  415. F(13);
  416. F(14);
  417. F(15);
  418. F(16);
  419. F(17);
  420. F(18);
  421. F(19);
  422. F(20);
  423. F(21);
  424. F(22);
  425. F(23);
  426. F(24);
  427. F(25);
  428. F(26);
  429. F(27);
  430. F(28);
  431. F(29);
  432. F(30);
  433. F(31);
  434. #undef F
  435. return r != 0;
  436. }
  437. /*
  438. h = f * g
  439. Can overlap h with f or g.
  440. Preconditions:
  441. |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  442. |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  443. Postconditions:
  444. |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  445. */
  446. /*
  447. Notes on implementation strategy:
  448. Using schoolbook multiplication.
  449. Karatsuba would save a little in some cost models.
  450. Most multiplications by 2 and 19 are 32-bit precomputations;
  451. cheaper than 64-bit postcomputations.
  452. There is one remaining multiplication by 19 in the carry chain;
  453. one *19 precomputation can be merged into this,
  454. but the resulting data flow is considerably less clean.
  455. There are 12 carries below.
  456. 10 of them are 2-way parallelizable and vectorizable.
  457. Can get away with 11 carries, but then data flow is much deeper.
  458. With tighter constraints on inputs can squeeze carries into int32.
  459. */
  460. void fe_mul(fe h, const fe f, const fe g) {
  461. int32_t f0 = f[0];
  462. int32_t f1 = f[1];
  463. int32_t f2 = f[2];
  464. int32_t f3 = f[3];
  465. int32_t f4 = f[4];
  466. int32_t f5 = f[5];
  467. int32_t f6 = f[6];
  468. int32_t f7 = f[7];
  469. int32_t f8 = f[8];
  470. int32_t f9 = f[9];
  471. int32_t g0 = g[0];
  472. int32_t g1 = g[1];
  473. int32_t g2 = g[2];
  474. int32_t g3 = g[3];
  475. int32_t g4 = g[4];
  476. int32_t g5 = g[5];
  477. int32_t g6 = g[6];
  478. int32_t g7 = g[7];
  479. int32_t g8 = g[8];
  480. int32_t g9 = g[9];
  481. int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
  482. int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
  483. int32_t g3_19 = 19 * g3;
  484. int32_t g4_19 = 19 * g4;
  485. int32_t g5_19 = 19 * g5;
  486. int32_t g6_19 = 19 * g6;
  487. int32_t g7_19 = 19 * g7;
  488. int32_t g8_19 = 19 * g8;
  489. int32_t g9_19 = 19 * g9;
  490. int32_t f1_2 = 2 * f1;
  491. int32_t f3_2 = 2 * f3;
  492. int32_t f5_2 = 2 * f5;
  493. int32_t f7_2 = 2 * f7;
  494. int32_t f9_2 = 2 * f9;
  495. int64_t f0g0 = f0 * (int64_t) g0;
  496. int64_t f0g1 = f0 * (int64_t) g1;
  497. int64_t f0g2 = f0 * (int64_t) g2;
  498. int64_t f0g3 = f0 * (int64_t) g3;
  499. int64_t f0g4 = f0 * (int64_t) g4;
  500. int64_t f0g5 = f0 * (int64_t) g5;
  501. int64_t f0g6 = f0 * (int64_t) g6;
  502. int64_t f0g7 = f0 * (int64_t) g7;
  503. int64_t f0g8 = f0 * (int64_t) g8;
  504. int64_t f0g9 = f0 * (int64_t) g9;
  505. int64_t f1g0 = f1 * (int64_t) g0;
  506. int64_t f1g1_2 = f1_2 * (int64_t) g1;
  507. int64_t f1g2 = f1 * (int64_t) g2;
  508. int64_t f1g3_2 = f1_2 * (int64_t) g3;
  509. int64_t f1g4 = f1 * (int64_t) g4;
  510. int64_t f1g5_2 = f1_2 * (int64_t) g5;
  511. int64_t f1g6 = f1 * (int64_t) g6;
  512. int64_t f1g7_2 = f1_2 * (int64_t) g7;
  513. int64_t f1g8 = f1 * (int64_t) g8;
  514. int64_t f1g9_38 = f1_2 * (int64_t) g9_19;
  515. int64_t f2g0 = f2 * (int64_t) g0;
  516. int64_t f2g1 = f2 * (int64_t) g1;
  517. int64_t f2g2 = f2 * (int64_t) g2;
  518. int64_t f2g3 = f2 * (int64_t) g3;
  519. int64_t f2g4 = f2 * (int64_t) g4;
  520. int64_t f2g5 = f2 * (int64_t) g5;
  521. int64_t f2g6 = f2 * (int64_t) g6;
  522. int64_t f2g7 = f2 * (int64_t) g7;
  523. int64_t f2g8_19 = f2 * (int64_t) g8_19;
  524. int64_t f2g9_19 = f2 * (int64_t) g9_19;
  525. int64_t f3g0 = f3 * (int64_t) g0;
  526. int64_t f3g1_2 = f3_2 * (int64_t) g1;
  527. int64_t f3g2 = f3 * (int64_t) g2;
  528. int64_t f3g3_2 = f3_2 * (int64_t) g3;
  529. int64_t f3g4 = f3 * (int64_t) g4;
  530. int64_t f3g5_2 = f3_2 * (int64_t) g5;
  531. int64_t f3g6 = f3 * (int64_t) g6;
  532. int64_t f3g7_38 = f3_2 * (int64_t) g7_19;
  533. int64_t f3g8_19 = f3 * (int64_t) g8_19;
  534. int64_t f3g9_38 = f3_2 * (int64_t) g9_19;
  535. int64_t f4g0 = f4 * (int64_t) g0;
  536. int64_t f4g1 = f4 * (int64_t) g1;
  537. int64_t f4g2 = f4 * (int64_t) g2;
  538. int64_t f4g3 = f4 * (int64_t) g3;
  539. int64_t f4g4 = f4 * (int64_t) g4;
  540. int64_t f4g5 = f4 * (int64_t) g5;
  541. int64_t f4g6_19 = f4 * (int64_t) g6_19;
  542. int64_t f4g7_19 = f4 * (int64_t) g7_19;
  543. int64_t f4g8_19 = f4 * (int64_t) g8_19;
  544. int64_t f4g9_19 = f4 * (int64_t) g9_19;
  545. int64_t f5g0 = f5 * (int64_t) g0;
  546. int64_t f5g1_2 = f5_2 * (int64_t) g1;
  547. int64_t f5g2 = f5 * (int64_t) g2;
  548. int64_t f5g3_2 = f5_2 * (int64_t) g3;
  549. int64_t f5g4 = f5 * (int64_t) g4;
  550. int64_t f5g5_38 = f5_2 * (int64_t) g5_19;
  551. int64_t f5g6_19 = f5 * (int64_t) g6_19;
  552. int64_t f5g7_38 = f5_2 * (int64_t) g7_19;
  553. int64_t f5g8_19 = f5 * (int64_t) g8_19;
  554. int64_t f5g9_38 = f5_2 * (int64_t) g9_19;
  555. int64_t f6g0 = f6 * (int64_t) g0;
  556. int64_t f6g1 = f6 * (int64_t) g1;
  557. int64_t f6g2 = f6 * (int64_t) g2;
  558. int64_t f6g3 = f6 * (int64_t) g3;
  559. int64_t f6g4_19 = f6 * (int64_t) g4_19;
  560. int64_t f6g5_19 = f6 * (int64_t) g5_19;
  561. int64_t f6g6_19 = f6 * (int64_t) g6_19;
  562. int64_t f6g7_19 = f6 * (int64_t) g7_19;
  563. int64_t f6g8_19 = f6 * (int64_t) g8_19;
  564. int64_t f6g9_19 = f6 * (int64_t) g9_19;
  565. int64_t f7g0 = f7 * (int64_t) g0;
  566. int64_t f7g1_2 = f7_2 * (int64_t) g1;
  567. int64_t f7g2 = f7 * (int64_t) g2;
  568. int64_t f7g3_38 = f7_2 * (int64_t) g3_19;
  569. int64_t f7g4_19 = f7 * (int64_t) g4_19;
  570. int64_t f7g5_38 = f7_2 * (int64_t) g5_19;
  571. int64_t f7g6_19 = f7 * (int64_t) g6_19;
  572. int64_t f7g7_38 = f7_2 * (int64_t) g7_19;
  573. int64_t f7g8_19 = f7 * (int64_t) g8_19;
  574. int64_t f7g9_38 = f7_2 * (int64_t) g9_19;
  575. int64_t f8g0 = f8 * (int64_t) g0;
  576. int64_t f8g1 = f8 * (int64_t) g1;
  577. int64_t f8g2_19 = f8 * (int64_t) g2_19;
  578. int64_t f8g3_19 = f8 * (int64_t) g3_19;
  579. int64_t f8g4_19 = f8 * (int64_t) g4_19;
  580. int64_t f8g5_19 = f8 * (int64_t) g5_19;
  581. int64_t f8g6_19 = f8 * (int64_t) g6_19;
  582. int64_t f8g7_19 = f8 * (int64_t) g7_19;
  583. int64_t f8g8_19 = f8 * (int64_t) g8_19;
  584. int64_t f8g9_19 = f8 * (int64_t) g9_19;
  585. int64_t f9g0 = f9 * (int64_t) g0;
  586. int64_t f9g1_38 = f9_2 * (int64_t) g1_19;
  587. int64_t f9g2_19 = f9 * (int64_t) g2_19;
  588. int64_t f9g3_38 = f9_2 * (int64_t) g3_19;
  589. int64_t f9g4_19 = f9 * (int64_t) g4_19;
  590. int64_t f9g5_38 = f9_2 * (int64_t) g5_19;
  591. int64_t f9g6_19 = f9 * (int64_t) g6_19;
  592. int64_t f9g7_38 = f9_2 * (int64_t) g7_19;
  593. int64_t f9g8_19 = f9 * (int64_t) g8_19;
  594. int64_t f9g9_38 = f9_2 * (int64_t) g9_19;
  595. int64_t h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
  596. int64_t h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
  597. int64_t h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
  598. int64_t h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
  599. int64_t h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
  600. int64_t h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
  601. int64_t h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
  602. int64_t h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
  603. int64_t h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
  604. int64_t h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0 ;
  605. int64_t carry0;
  606. int64_t carry1;
  607. int64_t carry2;
  608. int64_t carry3;
  609. int64_t carry4;
  610. int64_t carry5;
  611. int64_t carry6;
  612. int64_t carry7;
  613. int64_t carry8;
  614. int64_t carry9;
  615. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  616. h1 += carry0;
  617. h0 -= carry0 << 26;
  618. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  619. h5 += carry4;
  620. h4 -= carry4 << 26;
  621. carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
  622. h2 += carry1;
  623. h1 -= carry1 << 25;
  624. carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
  625. h6 += carry5;
  626. h5 -= carry5 << 25;
  627. carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
  628. h3 += carry2;
  629. h2 -= carry2 << 26;
  630. carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
  631. h7 += carry6;
  632. h6 -= carry6 << 26;
  633. carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
  634. h4 += carry3;
  635. h3 -= carry3 << 25;
  636. carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
  637. h8 += carry7;
  638. h7 -= carry7 << 25;
  639. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  640. h5 += carry4;
  641. h4 -= carry4 << 26;
  642. carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
  643. h9 += carry8;
  644. h8 -= carry8 << 26;
  645. carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
  646. h0 += carry9 * 19;
  647. h9 -= carry9 << 25;
  648. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  649. h1 += carry0;
  650. h0 -= carry0 << 26;
  651. h[0] = (int32_t) h0;
  652. h[1] = (int32_t) h1;
  653. h[2] = (int32_t) h2;
  654. h[3] = (int32_t) h3;
  655. h[4] = (int32_t) h4;
  656. h[5] = (int32_t) h5;
  657. h[6] = (int32_t) h6;
  658. h[7] = (int32_t) h7;
  659. h[8] = (int32_t) h8;
  660. h[9] = (int32_t) h9;
  661. }
  662. /*
  663. h = f * 121666
  664. Can overlap h with f.
  665. Preconditions:
  666. |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  667. Postconditions:
  668. |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  669. */
  670. void fe_mul121666(fe h, fe f) {
  671. int32_t f0 = f[0];
  672. int32_t f1 = f[1];
  673. int32_t f2 = f[2];
  674. int32_t f3 = f[3];
  675. int32_t f4 = f[4];
  676. int32_t f5 = f[5];
  677. int32_t f6 = f[6];
  678. int32_t f7 = f[7];
  679. int32_t f8 = f[8];
  680. int32_t f9 = f[9];
  681. int64_t h0 = f0 * (int64_t) 121666;
  682. int64_t h1 = f1 * (int64_t) 121666;
  683. int64_t h2 = f2 * (int64_t) 121666;
  684. int64_t h3 = f3 * (int64_t) 121666;
  685. int64_t h4 = f4 * (int64_t) 121666;
  686. int64_t h5 = f5 * (int64_t) 121666;
  687. int64_t h6 = f6 * (int64_t) 121666;
  688. int64_t h7 = f7 * (int64_t) 121666;
  689. int64_t h8 = f8 * (int64_t) 121666;
  690. int64_t h9 = f9 * (int64_t) 121666;
  691. int64_t carry0;
  692. int64_t carry1;
  693. int64_t carry2;
  694. int64_t carry3;
  695. int64_t carry4;
  696. int64_t carry5;
  697. int64_t carry6;
  698. int64_t carry7;
  699. int64_t carry8;
  700. int64_t carry9;
  701. carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
  702. carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
  703. carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
  704. carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
  705. carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
  706. carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
  707. carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
  708. carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
  709. carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
  710. carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
  711. h[0] = (int32_t) h0;
  712. h[1] = (int32_t) h1;
  713. h[2] = (int32_t) h2;
  714. h[3] = (int32_t) h3;
  715. h[4] = (int32_t) h4;
  716. h[5] = (int32_t) h5;
  717. h[6] = (int32_t) h6;
  718. h[7] = (int32_t) h7;
  719. h[8] = (int32_t) h8;
  720. h[9] = (int32_t) h9;
  721. }
  722. /*
  723. h = -f
  724. Preconditions:
  725. |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  726. Postconditions:
  727. |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  728. */
  729. void fe_neg(fe h, const fe f) {
  730. int32_t f0 = f[0];
  731. int32_t f1 = f[1];
  732. int32_t f2 = f[2];
  733. int32_t f3 = f[3];
  734. int32_t f4 = f[4];
  735. int32_t f5 = f[5];
  736. int32_t f6 = f[6];
  737. int32_t f7 = f[7];
  738. int32_t f8 = f[8];
  739. int32_t f9 = f[9];
  740. int32_t h0 = -f0;
  741. int32_t h1 = -f1;
  742. int32_t h2 = -f2;
  743. int32_t h3 = -f3;
  744. int32_t h4 = -f4;
  745. int32_t h5 = -f5;
  746. int32_t h6 = -f6;
  747. int32_t h7 = -f7;
  748. int32_t h8 = -f8;
  749. int32_t h9 = -f9;
  750. h[0] = h0;
  751. h[1] = h1;
  752. h[2] = h2;
  753. h[3] = h3;
  754. h[4] = h4;
  755. h[5] = h5;
  756. h[6] = h6;
  757. h[7] = h7;
  758. h[8] = h8;
  759. h[9] = h9;
  760. }
  761. void fe_pow22523(fe out, const fe z) {
  762. fe t0;
  763. fe t1;
  764. fe t2;
  765. int i;
  766. fe_sq(t0, z);
  767. for (i = 1; i < 1; ++i) {
  768. fe_sq(t0, t0);
  769. }
  770. fe_sq(t1, t0);
  771. for (i = 1; i < 2; ++i) {
  772. fe_sq(t1, t1);
  773. }
  774. fe_mul(t1, z, t1);
  775. fe_mul(t0, t0, t1);
  776. fe_sq(t0, t0);
  777. for (i = 1; i < 1; ++i) {
  778. fe_sq(t0, t0);
  779. }
  780. fe_mul(t0, t1, t0);
  781. fe_sq(t1, t0);
  782. for (i = 1; i < 5; ++i) {
  783. fe_sq(t1, t1);
  784. }
  785. fe_mul(t0, t1, t0);
  786. fe_sq(t1, t0);
  787. for (i = 1; i < 10; ++i) {
  788. fe_sq(t1, t1);
  789. }
  790. fe_mul(t1, t1, t0);
  791. fe_sq(t2, t1);
  792. for (i = 1; i < 20; ++i) {
  793. fe_sq(t2, t2);
  794. }
  795. fe_mul(t1, t2, t1);
  796. fe_sq(t1, t1);
  797. for (i = 1; i < 10; ++i) {
  798. fe_sq(t1, t1);
  799. }
  800. fe_mul(t0, t1, t0);
  801. fe_sq(t1, t0);
  802. for (i = 1; i < 50; ++i) {
  803. fe_sq(t1, t1);
  804. }
  805. fe_mul(t1, t1, t0);
  806. fe_sq(t2, t1);
  807. for (i = 1; i < 100; ++i) {
  808. fe_sq(t2, t2);
  809. }
  810. fe_mul(t1, t2, t1);
  811. fe_sq(t1, t1);
  812. for (i = 1; i < 50; ++i) {
  813. fe_sq(t1, t1);
  814. }
  815. fe_mul(t0, t1, t0);
  816. fe_sq(t0, t0);
  817. for (i = 1; i < 2; ++i) {
  818. fe_sq(t0, t0);
  819. }
  820. fe_mul(out, t0, z);
  821. return;
  822. }
  823. /*
  824. h = f * f
  825. Can overlap h with f.
  826. Preconditions:
  827. |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  828. Postconditions:
  829. |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  830. */
  831. /*
  832. See fe_mul.c for discussion of implementation strategy.
  833. */
  834. void fe_sq(fe h, const fe f) {
  835. int32_t f0 = f[0];
  836. int32_t f1 = f[1];
  837. int32_t f2 = f[2];
  838. int32_t f3 = f[3];
  839. int32_t f4 = f[4];
  840. int32_t f5 = f[5];
  841. int32_t f6 = f[6];
  842. int32_t f7 = f[7];
  843. int32_t f8 = f[8];
  844. int32_t f9 = f[9];
  845. int32_t f0_2 = 2 * f0;
  846. int32_t f1_2 = 2 * f1;
  847. int32_t f2_2 = 2 * f2;
  848. int32_t f3_2 = 2 * f3;
  849. int32_t f4_2 = 2 * f4;
  850. int32_t f5_2 = 2 * f5;
  851. int32_t f6_2 = 2 * f6;
  852. int32_t f7_2 = 2 * f7;
  853. int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
  854. int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
  855. int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
  856. int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
  857. int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
  858. int64_t f0f0 = f0 * (int64_t) f0;
  859. int64_t f0f1_2 = f0_2 * (int64_t) f1;
  860. int64_t f0f2_2 = f0_2 * (int64_t) f2;
  861. int64_t f0f3_2 = f0_2 * (int64_t) f3;
  862. int64_t f0f4_2 = f0_2 * (int64_t) f4;
  863. int64_t f0f5_2 = f0_2 * (int64_t) f5;
  864. int64_t f0f6_2 = f0_2 * (int64_t) f6;
  865. int64_t f0f7_2 = f0_2 * (int64_t) f7;
  866. int64_t f0f8_2 = f0_2 * (int64_t) f8;
  867. int64_t f0f9_2 = f0_2 * (int64_t) f9;
  868. int64_t f1f1_2 = f1_2 * (int64_t) f1;
  869. int64_t f1f2_2 = f1_2 * (int64_t) f2;
  870. int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
  871. int64_t f1f4_2 = f1_2 * (int64_t) f4;
  872. int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
  873. int64_t f1f6_2 = f1_2 * (int64_t) f6;
  874. int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
  875. int64_t f1f8_2 = f1_2 * (int64_t) f8;
  876. int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
  877. int64_t f2f2 = f2 * (int64_t) f2;
  878. int64_t f2f3_2 = f2_2 * (int64_t) f3;
  879. int64_t f2f4_2 = f2_2 * (int64_t) f4;
  880. int64_t f2f5_2 = f2_2 * (int64_t) f5;
  881. int64_t f2f6_2 = f2_2 * (int64_t) f6;
  882. int64_t f2f7_2 = f2_2 * (int64_t) f7;
  883. int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
  884. int64_t f2f9_38 = f2 * (int64_t) f9_38;
  885. int64_t f3f3_2 = f3_2 * (int64_t) f3;
  886. int64_t f3f4_2 = f3_2 * (int64_t) f4;
  887. int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
  888. int64_t f3f6_2 = f3_2 * (int64_t) f6;
  889. int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
  890. int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
  891. int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
  892. int64_t f4f4 = f4 * (int64_t) f4;
  893. int64_t f4f5_2 = f4_2 * (int64_t) f5;
  894. int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
  895. int64_t f4f7_38 = f4 * (int64_t) f7_38;
  896. int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
  897. int64_t f4f9_38 = f4 * (int64_t) f9_38;
  898. int64_t f5f5_38 = f5 * (int64_t) f5_38;
  899. int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
  900. int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
  901. int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
  902. int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
  903. int64_t f6f6_19 = f6 * (int64_t) f6_19;
  904. int64_t f6f7_38 = f6 * (int64_t) f7_38;
  905. int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
  906. int64_t f6f9_38 = f6 * (int64_t) f9_38;
  907. int64_t f7f7_38 = f7 * (int64_t) f7_38;
  908. int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
  909. int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
  910. int64_t f8f8_19 = f8 * (int64_t) f8_19;
  911. int64_t f8f9_38 = f8 * (int64_t) f9_38;
  912. int64_t f9f9_38 = f9 * (int64_t) f9_38;
  913. int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
  914. int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
  915. int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
  916. int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
  917. int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
  918. int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
  919. int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
  920. int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
  921. int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
  922. int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
  923. int64_t carry0;
  924. int64_t carry1;
  925. int64_t carry2;
  926. int64_t carry3;
  927. int64_t carry4;
  928. int64_t carry5;
  929. int64_t carry6;
  930. int64_t carry7;
  931. int64_t carry8;
  932. int64_t carry9;
  933. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  934. h1 += carry0;
  935. h0 -= carry0 << 26;
  936. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  937. h5 += carry4;
  938. h4 -= carry4 << 26;
  939. carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
  940. h2 += carry1;
  941. h1 -= carry1 << 25;
  942. carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
  943. h6 += carry5;
  944. h5 -= carry5 << 25;
  945. carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
  946. h3 += carry2;
  947. h2 -= carry2 << 26;
  948. carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
  949. h7 += carry6;
  950. h6 -= carry6 << 26;
  951. carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
  952. h4 += carry3;
  953. h3 -= carry3 << 25;
  954. carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
  955. h8 += carry7;
  956. h7 -= carry7 << 25;
  957. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  958. h5 += carry4;
  959. h4 -= carry4 << 26;
  960. carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
  961. h9 += carry8;
  962. h8 -= carry8 << 26;
  963. carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
  964. h0 += carry9 * 19;
  965. h9 -= carry9 << 25;
  966. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  967. h1 += carry0;
  968. h0 -= carry0 << 26;
  969. h[0] = (int32_t) h0;
  970. h[1] = (int32_t) h1;
  971. h[2] = (int32_t) h2;
  972. h[3] = (int32_t) h3;
  973. h[4] = (int32_t) h4;
  974. h[5] = (int32_t) h5;
  975. h[6] = (int32_t) h6;
  976. h[7] = (int32_t) h7;
  977. h[8] = (int32_t) h8;
  978. h[9] = (int32_t) h9;
  979. }
  980. /*
  981. h = 2 * f * f
  982. Can overlap h with f.
  983. Preconditions:
  984. |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
  985. Postconditions:
  986. |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
  987. */
  988. /*
  989. See fe_mul.c for discussion of implementation strategy.
  990. */
  991. void fe_sq2(fe h, const fe f) {
  992. int32_t f0 = f[0];
  993. int32_t f1 = f[1];
  994. int32_t f2 = f[2];
  995. int32_t f3 = f[3];
  996. int32_t f4 = f[4];
  997. int32_t f5 = f[5];
  998. int32_t f6 = f[6];
  999. int32_t f7 = f[7];
  1000. int32_t f8 = f[8];
  1001. int32_t f9 = f[9];
  1002. int32_t f0_2 = 2 * f0;
  1003. int32_t f1_2 = 2 * f1;
  1004. int32_t f2_2 = 2 * f2;
  1005. int32_t f3_2 = 2 * f3;
  1006. int32_t f4_2 = 2 * f4;
  1007. int32_t f5_2 = 2 * f5;
  1008. int32_t f6_2 = 2 * f6;
  1009. int32_t f7_2 = 2 * f7;
  1010. int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
  1011. int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
  1012. int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
  1013. int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
  1014. int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
  1015. int64_t f0f0 = f0 * (int64_t) f0;
  1016. int64_t f0f1_2 = f0_2 * (int64_t) f1;
  1017. int64_t f0f2_2 = f0_2 * (int64_t) f2;
  1018. int64_t f0f3_2 = f0_2 * (int64_t) f3;
  1019. int64_t f0f4_2 = f0_2 * (int64_t) f4;
  1020. int64_t f0f5_2 = f0_2 * (int64_t) f5;
  1021. int64_t f0f6_2 = f0_2 * (int64_t) f6;
  1022. int64_t f0f7_2 = f0_2 * (int64_t) f7;
  1023. int64_t f0f8_2 = f0_2 * (int64_t) f8;
  1024. int64_t f0f9_2 = f0_2 * (int64_t) f9;
  1025. int64_t f1f1_2 = f1_2 * (int64_t) f1;
  1026. int64_t f1f2_2 = f1_2 * (int64_t) f2;
  1027. int64_t f1f3_4 = f1_2 * (int64_t) f3_2;
  1028. int64_t f1f4_2 = f1_2 * (int64_t) f4;
  1029. int64_t f1f5_4 = f1_2 * (int64_t) f5_2;
  1030. int64_t f1f6_2 = f1_2 * (int64_t) f6;
  1031. int64_t f1f7_4 = f1_2 * (int64_t) f7_2;
  1032. int64_t f1f8_2 = f1_2 * (int64_t) f8;
  1033. int64_t f1f9_76 = f1_2 * (int64_t) f9_38;
  1034. int64_t f2f2 = f2 * (int64_t) f2;
  1035. int64_t f2f3_2 = f2_2 * (int64_t) f3;
  1036. int64_t f2f4_2 = f2_2 * (int64_t) f4;
  1037. int64_t f2f5_2 = f2_2 * (int64_t) f5;
  1038. int64_t f2f6_2 = f2_2 * (int64_t) f6;
  1039. int64_t f2f7_2 = f2_2 * (int64_t) f7;
  1040. int64_t f2f8_38 = f2_2 * (int64_t) f8_19;
  1041. int64_t f2f9_38 = f2 * (int64_t) f9_38;
  1042. int64_t f3f3_2 = f3_2 * (int64_t) f3;
  1043. int64_t f3f4_2 = f3_2 * (int64_t) f4;
  1044. int64_t f3f5_4 = f3_2 * (int64_t) f5_2;
  1045. int64_t f3f6_2 = f3_2 * (int64_t) f6;
  1046. int64_t f3f7_76 = f3_2 * (int64_t) f7_38;
  1047. int64_t f3f8_38 = f3_2 * (int64_t) f8_19;
  1048. int64_t f3f9_76 = f3_2 * (int64_t) f9_38;
  1049. int64_t f4f4 = f4 * (int64_t) f4;
  1050. int64_t f4f5_2 = f4_2 * (int64_t) f5;
  1051. int64_t f4f6_38 = f4_2 * (int64_t) f6_19;
  1052. int64_t f4f7_38 = f4 * (int64_t) f7_38;
  1053. int64_t f4f8_38 = f4_2 * (int64_t) f8_19;
  1054. int64_t f4f9_38 = f4 * (int64_t) f9_38;
  1055. int64_t f5f5_38 = f5 * (int64_t) f5_38;
  1056. int64_t f5f6_38 = f5_2 * (int64_t) f6_19;
  1057. int64_t f5f7_76 = f5_2 * (int64_t) f7_38;
  1058. int64_t f5f8_38 = f5_2 * (int64_t) f8_19;
  1059. int64_t f5f9_76 = f5_2 * (int64_t) f9_38;
  1060. int64_t f6f6_19 = f6 * (int64_t) f6_19;
  1061. int64_t f6f7_38 = f6 * (int64_t) f7_38;
  1062. int64_t f6f8_38 = f6_2 * (int64_t) f8_19;
  1063. int64_t f6f9_38 = f6 * (int64_t) f9_38;
  1064. int64_t f7f7_38 = f7 * (int64_t) f7_38;
  1065. int64_t f7f8_38 = f7_2 * (int64_t) f8_19;
  1066. int64_t f7f9_76 = f7_2 * (int64_t) f9_38;
  1067. int64_t f8f8_19 = f8 * (int64_t) f8_19;
  1068. int64_t f8f9_38 = f8 * (int64_t) f9_38;
  1069. int64_t f9f9_38 = f9 * (int64_t) f9_38;
  1070. int64_t h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
  1071. int64_t h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
  1072. int64_t h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
  1073. int64_t h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
  1074. int64_t h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
  1075. int64_t h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
  1076. int64_t h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
  1077. int64_t h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
  1078. int64_t h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
  1079. int64_t h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
  1080. int64_t carry0;
  1081. int64_t carry1;
  1082. int64_t carry2;
  1083. int64_t carry3;
  1084. int64_t carry4;
  1085. int64_t carry5;
  1086. int64_t carry6;
  1087. int64_t carry7;
  1088. int64_t carry8;
  1089. int64_t carry9;
  1090. h0 += h0;
  1091. h1 += h1;
  1092. h2 += h2;
  1093. h3 += h3;
  1094. h4 += h4;
  1095. h5 += h5;
  1096. h6 += h6;
  1097. h7 += h7;
  1098. h8 += h8;
  1099. h9 += h9;
  1100. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  1101. h1 += carry0;
  1102. h0 -= carry0 << 26;
  1103. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  1104. h5 += carry4;
  1105. h4 -= carry4 << 26;
  1106. carry1 = (h1 + (int64_t) (1 << 24)) >> 25;
  1107. h2 += carry1;
  1108. h1 -= carry1 << 25;
  1109. carry5 = (h5 + (int64_t) (1 << 24)) >> 25;
  1110. h6 += carry5;
  1111. h5 -= carry5 << 25;
  1112. carry2 = (h2 + (int64_t) (1 << 25)) >> 26;
  1113. h3 += carry2;
  1114. h2 -= carry2 << 26;
  1115. carry6 = (h6 + (int64_t) (1 << 25)) >> 26;
  1116. h7 += carry6;
  1117. h6 -= carry6 << 26;
  1118. carry3 = (h3 + (int64_t) (1 << 24)) >> 25;
  1119. h4 += carry3;
  1120. h3 -= carry3 << 25;
  1121. carry7 = (h7 + (int64_t) (1 << 24)) >> 25;
  1122. h8 += carry7;
  1123. h7 -= carry7 << 25;
  1124. carry4 = (h4 + (int64_t) (1 << 25)) >> 26;
  1125. h5 += carry4;
  1126. h4 -= carry4 << 26;
  1127. carry8 = (h8 + (int64_t) (1 << 25)) >> 26;
  1128. h9 += carry8;
  1129. h8 -= carry8 << 26;
  1130. carry9 = (h9 + (int64_t) (1 << 24)) >> 25;
  1131. h0 += carry9 * 19;
  1132. h9 -= carry9 << 25;
  1133. carry0 = (h0 + (int64_t) (1 << 25)) >> 26;
  1134. h1 += carry0;
  1135. h0 -= carry0 << 26;
  1136. h[0] = (int32_t) h0;
  1137. h[1] = (int32_t) h1;
  1138. h[2] = (int32_t) h2;
  1139. h[3] = (int32_t) h3;
  1140. h[4] = (int32_t) h4;
  1141. h[5] = (int32_t) h5;
  1142. h[6] = (int32_t) h6;
  1143. h[7] = (int32_t) h7;
  1144. h[8] = (int32_t) h8;
  1145. h[9] = (int32_t) h9;
  1146. }
  1147. /*
  1148. h = f - g
  1149. Can overlap h with f or g.
  1150. Preconditions:
  1151. |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  1152. |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
  1153. Postconditions:
  1154. |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  1155. */
  1156. void fe_sub(fe h, const fe f, const fe g) {
  1157. int32_t f0 = f[0];
  1158. int32_t f1 = f[1];
  1159. int32_t f2 = f[2];
  1160. int32_t f3 = f[3];
  1161. int32_t f4 = f[4];
  1162. int32_t f5 = f[5];
  1163. int32_t f6 = f[6];
  1164. int32_t f7 = f[7];
  1165. int32_t f8 = f[8];
  1166. int32_t f9 = f[9];
  1167. int32_t g0 = g[0];
  1168. int32_t g1 = g[1];
  1169. int32_t g2 = g[2];
  1170. int32_t g3 = g[3];
  1171. int32_t g4 = g[4];
  1172. int32_t g5 = g[5];
  1173. int32_t g6 = g[6];
  1174. int32_t g7 = g[7];
  1175. int32_t g8 = g[8];
  1176. int32_t g9 = g[9];
  1177. int32_t h0 = f0 - g0;
  1178. int32_t h1 = f1 - g1;
  1179. int32_t h2 = f2 - g2;
  1180. int32_t h3 = f3 - g3;
  1181. int32_t h4 = f4 - g4;
  1182. int32_t h5 = f5 - g5;
  1183. int32_t h6 = f6 - g6;
  1184. int32_t h7 = f7 - g7;
  1185. int32_t h8 = f8 - g8;
  1186. int32_t h9 = f9 - g9;
  1187. h[0] = h0;
  1188. h[1] = h1;
  1189. h[2] = h2;
  1190. h[3] = h3;
  1191. h[4] = h4;
  1192. h[5] = h5;
  1193. h[6] = h6;
  1194. h[7] = h7;
  1195. h[8] = h8;
  1196. h[9] = h9;
  1197. }
  1198. /*
  1199. Preconditions:
  1200. |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
  1201. Write p=2^255-19; q=floor(h/p).
  1202. Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
  1203. Proof:
  1204. Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
  1205. Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
  1206. Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
  1207. Then 0<y<1.
  1208. Write r=h-pq.
  1209. Have 0<=r<=p-1=2^255-20.
  1210. Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
  1211. Write x=r+19(2^-255)r+y.
  1212. Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
  1213. Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
  1214. so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
  1215. */
  1216. void fe_tobytes(unsigned char *s, const fe h) {
  1217. int32_t h0 = h[0];
  1218. int32_t h1 = h[1];
  1219. int32_t h2 = h[2];
  1220. int32_t h3 = h[3];
  1221. int32_t h4 = h[4];
  1222. int32_t h5 = h[5];
  1223. int32_t h6 = h[6];
  1224. int32_t h7 = h[7];
  1225. int32_t h8 = h[8];
  1226. int32_t h9 = h[9];
  1227. int32_t q;
  1228. int32_t carry0;
  1229. int32_t carry1;
  1230. int32_t carry2;
  1231. int32_t carry3;
  1232. int32_t carry4;
  1233. int32_t carry5;
  1234. int32_t carry6;
  1235. int32_t carry7;
  1236. int32_t carry8;
  1237. int32_t carry9;
  1238. q = (19 * h9 + (((int32_t) 1) << 24)) >> 25;
  1239. q = (h0 + q) >> 26;
  1240. q = (h1 + q) >> 25;
  1241. q = (h2 + q) >> 26;
  1242. q = (h3 + q) >> 25;
  1243. q = (h4 + q) >> 26;
  1244. q = (h5 + q) >> 25;
  1245. q = (h6 + q) >> 26;
  1246. q = (h7 + q) >> 25;
  1247. q = (h8 + q) >> 26;
  1248. q = (h9 + q) >> 25;
  1249. /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
  1250. h0 += 19 * q;
  1251. /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
  1252. carry0 = h0 >> 26;
  1253. h1 += carry0;
  1254. h0 -= carry0 << 26;
  1255. carry1 = h1 >> 25;
  1256. h2 += carry1;
  1257. h1 -= carry1 << 25;
  1258. carry2 = h2 >> 26;
  1259. h3 += carry2;
  1260. h2 -= carry2 << 26;
  1261. carry3 = h3 >> 25;
  1262. h4 += carry3;
  1263. h3 -= carry3 << 25;
  1264. carry4 = h4 >> 26;
  1265. h5 += carry4;
  1266. h4 -= carry4 << 26;
  1267. carry5 = h5 >> 25;
  1268. h6 += carry5;
  1269. h5 -= carry5 << 25;
  1270. carry6 = h6 >> 26;
  1271. h7 += carry6;
  1272. h6 -= carry6 << 26;
  1273. carry7 = h7 >> 25;
  1274. h8 += carry7;
  1275. h7 -= carry7 << 25;
  1276. carry8 = h8 >> 26;
  1277. h9 += carry8;
  1278. h8 -= carry8 << 26;
  1279. carry9 = h9 >> 25;
  1280. h9 -= carry9 << 25;
  1281. /* h10 = carry9 */
  1282. /*
  1283. Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
  1284. Have h0+...+2^230 h9 between 0 and 2^255-1;
  1285. evidently 2^255 h10-2^255 q = 0.
  1286. Goal: Output h0+...+2^230 h9.
  1287. */
  1288. s[0] = (unsigned char) (h0 >> 0);
  1289. s[1] = (unsigned char) (h0 >> 8);
  1290. s[2] = (unsigned char) (h0 >> 16);
  1291. s[3] = (unsigned char) ((h0 >> 24) | (h1 << 2));
  1292. s[4] = (unsigned char) (h1 >> 6);
  1293. s[5] = (unsigned char) (h1 >> 14);
  1294. s[6] = (unsigned char) ((h1 >> 22) | (h2 << 3));
  1295. s[7] = (unsigned char) (h2 >> 5);
  1296. s[8] = (unsigned char) (h2 >> 13);
  1297. s[9] = (unsigned char) ((h2 >> 21) | (h3 << 5));
  1298. s[10] = (unsigned char) (h3 >> 3);
  1299. s[11] = (unsigned char) (h3 >> 11);
  1300. s[12] = (unsigned char) ((h3 >> 19) | (h4 << 6));
  1301. s[13] = (unsigned char) (h4 >> 2);
  1302. s[14] = (unsigned char) (h4 >> 10);
  1303. s[15] = (unsigned char) (h4 >> 18);
  1304. s[16] = (unsigned char) (h5 >> 0);
  1305. s[17] = (unsigned char) (h5 >> 8);
  1306. s[18] = (unsigned char) (h5 >> 16);
  1307. s[19] = (unsigned char) ((h5 >> 24) | (h6 << 1));
  1308. s[20] = (unsigned char) (h6 >> 7);
  1309. s[21] = (unsigned char) (h6 >> 15);
  1310. s[22] = (unsigned char) ((h6 >> 23) | (h7 << 3));
  1311. s[23] = (unsigned char) (h7 >> 5);
  1312. s[24] = (unsigned char) (h7 >> 13);
  1313. s[25] = (unsigned char) ((h7 >> 21) | (h8 << 4));
  1314. s[26] = (unsigned char) (h8 >> 4);
  1315. s[27] = (unsigned char) (h8 >> 12);
  1316. s[28] = (unsigned char) ((h8 >> 20) | (h9 << 6));
  1317. s[29] = (unsigned char) (h9 >> 2);
  1318. s[30] = (unsigned char) (h9 >> 10);
  1319. s[31] = (unsigned char) (h9 >> 18);
  1320. }