salsa20-i586-asm_32.S 20 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115
  1. # salsa20_pm.s version 20051229
  2. # D. J. Bernstein
  3. # Public domain.
  4. # enter ECRYPT_encrypt_bytes
  5. .text
  6. .p2align 5
  7. .globl ECRYPT_encrypt_bytes
  8. ECRYPT_encrypt_bytes:
  9. mov %esp,%eax
  10. and $31,%eax
  11. add $256,%eax
  12. sub %eax,%esp
  13. # eax_stack = eax
  14. movl %eax,80(%esp)
  15. # ebx_stack = ebx
  16. movl %ebx,84(%esp)
  17. # esi_stack = esi
  18. movl %esi,88(%esp)
  19. # edi_stack = edi
  20. movl %edi,92(%esp)
  21. # ebp_stack = ebp
  22. movl %ebp,96(%esp)
  23. # x = arg1
  24. movl 4(%esp,%eax),%edx
  25. # m = arg2
  26. movl 8(%esp,%eax),%esi
  27. # out = arg3
  28. movl 12(%esp,%eax),%edi
  29. # bytes = arg4
  30. movl 16(%esp,%eax),%ebx
  31. # bytes -= 0
  32. sub $0,%ebx
  33. # goto done if unsigned<=
  34. jbe ._done
  35. ._start:
  36. # in0 = *(uint32 *) (x + 0)
  37. movl 0(%edx),%eax
  38. # in1 = *(uint32 *) (x + 4)
  39. movl 4(%edx),%ecx
  40. # in2 = *(uint32 *) (x + 8)
  41. movl 8(%edx),%ebp
  42. # j0 = in0
  43. movl %eax,164(%esp)
  44. # in3 = *(uint32 *) (x + 12)
  45. movl 12(%edx),%eax
  46. # j1 = in1
  47. movl %ecx,168(%esp)
  48. # in4 = *(uint32 *) (x + 16)
  49. movl 16(%edx),%ecx
  50. # j2 = in2
  51. movl %ebp,172(%esp)
  52. # in5 = *(uint32 *) (x + 20)
  53. movl 20(%edx),%ebp
  54. # j3 = in3
  55. movl %eax,176(%esp)
  56. # in6 = *(uint32 *) (x + 24)
  57. movl 24(%edx),%eax
  58. # j4 = in4
  59. movl %ecx,180(%esp)
  60. # in7 = *(uint32 *) (x + 28)
  61. movl 28(%edx),%ecx
  62. # j5 = in5
  63. movl %ebp,184(%esp)
  64. # in8 = *(uint32 *) (x + 32)
  65. movl 32(%edx),%ebp
  66. # j6 = in6
  67. movl %eax,188(%esp)
  68. # in9 = *(uint32 *) (x + 36)
  69. movl 36(%edx),%eax
  70. # j7 = in7
  71. movl %ecx,192(%esp)
  72. # in10 = *(uint32 *) (x + 40)
  73. movl 40(%edx),%ecx
  74. # j8 = in8
  75. movl %ebp,196(%esp)
  76. # in11 = *(uint32 *) (x + 44)
  77. movl 44(%edx),%ebp
  78. # j9 = in9
  79. movl %eax,200(%esp)
  80. # in12 = *(uint32 *) (x + 48)
  81. movl 48(%edx),%eax
  82. # j10 = in10
  83. movl %ecx,204(%esp)
  84. # in13 = *(uint32 *) (x + 52)
  85. movl 52(%edx),%ecx
  86. # j11 = in11
  87. movl %ebp,208(%esp)
  88. # in14 = *(uint32 *) (x + 56)
  89. movl 56(%edx),%ebp
  90. # j12 = in12
  91. movl %eax,212(%esp)
  92. # in15 = *(uint32 *) (x + 60)
  93. movl 60(%edx),%eax
  94. # j13 = in13
  95. movl %ecx,216(%esp)
  96. # j14 = in14
  97. movl %ebp,220(%esp)
  98. # j15 = in15
  99. movl %eax,224(%esp)
  100. # x_backup = x
  101. movl %edx,64(%esp)
  102. ._bytesatleast1:
  103. # bytes - 64
  104. cmp $64,%ebx
  105. # goto nocopy if unsigned>=
  106. jae ._nocopy
  107. # ctarget = out
  108. movl %edi,228(%esp)
  109. # out = &tmp
  110. leal 0(%esp),%edi
  111. # i = bytes
  112. mov %ebx,%ecx
  113. # while (i) { *out++ = *m++; --i }
  114. rep movsb
  115. # out = &tmp
  116. leal 0(%esp),%edi
  117. # m = &tmp
  118. leal 0(%esp),%esi
  119. ._nocopy:
  120. # out_backup = out
  121. movl %edi,72(%esp)
  122. # m_backup = m
  123. movl %esi,68(%esp)
  124. # bytes_backup = bytes
  125. movl %ebx,76(%esp)
  126. # in0 = j0
  127. movl 164(%esp),%eax
  128. # in1 = j1
  129. movl 168(%esp),%ecx
  130. # in2 = j2
  131. movl 172(%esp),%edx
  132. # in3 = j3
  133. movl 176(%esp),%ebx
  134. # x0 = in0
  135. movl %eax,100(%esp)
  136. # x1 = in1
  137. movl %ecx,104(%esp)
  138. # x2 = in2
  139. movl %edx,108(%esp)
  140. # x3 = in3
  141. movl %ebx,112(%esp)
  142. # in4 = j4
  143. movl 180(%esp),%eax
  144. # in5 = j5
  145. movl 184(%esp),%ecx
  146. # in6 = j6
  147. movl 188(%esp),%edx
  148. # in7 = j7
  149. movl 192(%esp),%ebx
  150. # x4 = in4
  151. movl %eax,116(%esp)
  152. # x5 = in5
  153. movl %ecx,120(%esp)
  154. # x6 = in6
  155. movl %edx,124(%esp)
  156. # x7 = in7
  157. movl %ebx,128(%esp)
  158. # in8 = j8
  159. movl 196(%esp),%eax
  160. # in9 = j9
  161. movl 200(%esp),%ecx
  162. # in10 = j10
  163. movl 204(%esp),%edx
  164. # in11 = j11
  165. movl 208(%esp),%ebx
  166. # x8 = in8
  167. movl %eax,132(%esp)
  168. # x9 = in9
  169. movl %ecx,136(%esp)
  170. # x10 = in10
  171. movl %edx,140(%esp)
  172. # x11 = in11
  173. movl %ebx,144(%esp)
  174. # in12 = j12
  175. movl 212(%esp),%eax
  176. # in13 = j13
  177. movl 216(%esp),%ecx
  178. # in14 = j14
  179. movl 220(%esp),%edx
  180. # in15 = j15
  181. movl 224(%esp),%ebx
  182. # x12 = in12
  183. movl %eax,148(%esp)
  184. # x13 = in13
  185. movl %ecx,152(%esp)
  186. # x14 = in14
  187. movl %edx,156(%esp)
  188. # x15 = in15
  189. movl %ebx,160(%esp)
  190. # i = 20
  191. mov $20,%ebp
  192. # p = x0
  193. movl 100(%esp),%eax
  194. # s = x5
  195. movl 120(%esp),%ecx
  196. # t = x10
  197. movl 140(%esp),%edx
  198. # w = x15
  199. movl 160(%esp),%ebx
  200. ._mainloop:
  201. # x0 = p
  202. movl %eax,100(%esp)
  203. # x10 = t
  204. movl %edx,140(%esp)
  205. # p += x12
  206. addl 148(%esp),%eax
  207. # x5 = s
  208. movl %ecx,120(%esp)
  209. # t += x6
  210. addl 124(%esp),%edx
  211. # x15 = w
  212. movl %ebx,160(%esp)
  213. # r = x1
  214. movl 104(%esp),%esi
  215. # r += s
  216. add %ecx,%esi
  217. # v = x11
  218. movl 144(%esp),%edi
  219. # v += w
  220. add %ebx,%edi
  221. # p <<<= 7
  222. rol $7,%eax
  223. # p ^= x4
  224. xorl 116(%esp),%eax
  225. # t <<<= 7
  226. rol $7,%edx
  227. # t ^= x14
  228. xorl 156(%esp),%edx
  229. # r <<<= 7
  230. rol $7,%esi
  231. # r ^= x9
  232. xorl 136(%esp),%esi
  233. # v <<<= 7
  234. rol $7,%edi
  235. # v ^= x3
  236. xorl 112(%esp),%edi
  237. # x4 = p
  238. movl %eax,116(%esp)
  239. # x14 = t
  240. movl %edx,156(%esp)
  241. # p += x0
  242. addl 100(%esp),%eax
  243. # x9 = r
  244. movl %esi,136(%esp)
  245. # t += x10
  246. addl 140(%esp),%edx
  247. # x3 = v
  248. movl %edi,112(%esp)
  249. # p <<<= 9
  250. rol $9,%eax
  251. # p ^= x8
  252. xorl 132(%esp),%eax
  253. # t <<<= 9
  254. rol $9,%edx
  255. # t ^= x2
  256. xorl 108(%esp),%edx
  257. # s += r
  258. add %esi,%ecx
  259. # s <<<= 9
  260. rol $9,%ecx
  261. # s ^= x13
  262. xorl 152(%esp),%ecx
  263. # w += v
  264. add %edi,%ebx
  265. # w <<<= 9
  266. rol $9,%ebx
  267. # w ^= x7
  268. xorl 128(%esp),%ebx
  269. # x8 = p
  270. movl %eax,132(%esp)
  271. # x2 = t
  272. movl %edx,108(%esp)
  273. # p += x4
  274. addl 116(%esp),%eax
  275. # x13 = s
  276. movl %ecx,152(%esp)
  277. # t += x14
  278. addl 156(%esp),%edx
  279. # x7 = w
  280. movl %ebx,128(%esp)
  281. # p <<<= 13
  282. rol $13,%eax
  283. # p ^= x12
  284. xorl 148(%esp),%eax
  285. # t <<<= 13
  286. rol $13,%edx
  287. # t ^= x6
  288. xorl 124(%esp),%edx
  289. # r += s
  290. add %ecx,%esi
  291. # r <<<= 13
  292. rol $13,%esi
  293. # r ^= x1
  294. xorl 104(%esp),%esi
  295. # v += w
  296. add %ebx,%edi
  297. # v <<<= 13
  298. rol $13,%edi
  299. # v ^= x11
  300. xorl 144(%esp),%edi
  301. # x12 = p
  302. movl %eax,148(%esp)
  303. # x6 = t
  304. movl %edx,124(%esp)
  305. # p += x8
  306. addl 132(%esp),%eax
  307. # x1 = r
  308. movl %esi,104(%esp)
  309. # t += x2
  310. addl 108(%esp),%edx
  311. # x11 = v
  312. movl %edi,144(%esp)
  313. # p <<<= 18
  314. rol $18,%eax
  315. # p ^= x0
  316. xorl 100(%esp),%eax
  317. # t <<<= 18
  318. rol $18,%edx
  319. # t ^= x10
  320. xorl 140(%esp),%edx
  321. # s += r
  322. add %esi,%ecx
  323. # s <<<= 18
  324. rol $18,%ecx
  325. # s ^= x5
  326. xorl 120(%esp),%ecx
  327. # w += v
  328. add %edi,%ebx
  329. # w <<<= 18
  330. rol $18,%ebx
  331. # w ^= x15
  332. xorl 160(%esp),%ebx
  333. # x0 = p
  334. movl %eax,100(%esp)
  335. # x10 = t
  336. movl %edx,140(%esp)
  337. # p += x3
  338. addl 112(%esp),%eax
  339. # p <<<= 7
  340. rol $7,%eax
  341. # x5 = s
  342. movl %ecx,120(%esp)
  343. # t += x9
  344. addl 136(%esp),%edx
  345. # x15 = w
  346. movl %ebx,160(%esp)
  347. # r = x4
  348. movl 116(%esp),%esi
  349. # r += s
  350. add %ecx,%esi
  351. # v = x14
  352. movl 156(%esp),%edi
  353. # v += w
  354. add %ebx,%edi
  355. # p ^= x1
  356. xorl 104(%esp),%eax
  357. # t <<<= 7
  358. rol $7,%edx
  359. # t ^= x11
  360. xorl 144(%esp),%edx
  361. # r <<<= 7
  362. rol $7,%esi
  363. # r ^= x6
  364. xorl 124(%esp),%esi
  365. # v <<<= 7
  366. rol $7,%edi
  367. # v ^= x12
  368. xorl 148(%esp),%edi
  369. # x1 = p
  370. movl %eax,104(%esp)
  371. # x11 = t
  372. movl %edx,144(%esp)
  373. # p += x0
  374. addl 100(%esp),%eax
  375. # x6 = r
  376. movl %esi,124(%esp)
  377. # t += x10
  378. addl 140(%esp),%edx
  379. # x12 = v
  380. movl %edi,148(%esp)
  381. # p <<<= 9
  382. rol $9,%eax
  383. # p ^= x2
  384. xorl 108(%esp),%eax
  385. # t <<<= 9
  386. rol $9,%edx
  387. # t ^= x8
  388. xorl 132(%esp),%edx
  389. # s += r
  390. add %esi,%ecx
  391. # s <<<= 9
  392. rol $9,%ecx
  393. # s ^= x7
  394. xorl 128(%esp),%ecx
  395. # w += v
  396. add %edi,%ebx
  397. # w <<<= 9
  398. rol $9,%ebx
  399. # w ^= x13
  400. xorl 152(%esp),%ebx
  401. # x2 = p
  402. movl %eax,108(%esp)
  403. # x8 = t
  404. movl %edx,132(%esp)
  405. # p += x1
  406. addl 104(%esp),%eax
  407. # x7 = s
  408. movl %ecx,128(%esp)
  409. # t += x11
  410. addl 144(%esp),%edx
  411. # x13 = w
  412. movl %ebx,152(%esp)
  413. # p <<<= 13
  414. rol $13,%eax
  415. # p ^= x3
  416. xorl 112(%esp),%eax
  417. # t <<<= 13
  418. rol $13,%edx
  419. # t ^= x9
  420. xorl 136(%esp),%edx
  421. # r += s
  422. add %ecx,%esi
  423. # r <<<= 13
  424. rol $13,%esi
  425. # r ^= x4
  426. xorl 116(%esp),%esi
  427. # v += w
  428. add %ebx,%edi
  429. # v <<<= 13
  430. rol $13,%edi
  431. # v ^= x14
  432. xorl 156(%esp),%edi
  433. # x3 = p
  434. movl %eax,112(%esp)
  435. # x9 = t
  436. movl %edx,136(%esp)
  437. # p += x2
  438. addl 108(%esp),%eax
  439. # x4 = r
  440. movl %esi,116(%esp)
  441. # t += x8
  442. addl 132(%esp),%edx
  443. # x14 = v
  444. movl %edi,156(%esp)
  445. # p <<<= 18
  446. rol $18,%eax
  447. # p ^= x0
  448. xorl 100(%esp),%eax
  449. # t <<<= 18
  450. rol $18,%edx
  451. # t ^= x10
  452. xorl 140(%esp),%edx
  453. # s += r
  454. add %esi,%ecx
  455. # s <<<= 18
  456. rol $18,%ecx
  457. # s ^= x5
  458. xorl 120(%esp),%ecx
  459. # w += v
  460. add %edi,%ebx
  461. # w <<<= 18
  462. rol $18,%ebx
  463. # w ^= x15
  464. xorl 160(%esp),%ebx
  465. # x0 = p
  466. movl %eax,100(%esp)
  467. # x10 = t
  468. movl %edx,140(%esp)
  469. # p += x12
  470. addl 148(%esp),%eax
  471. # x5 = s
  472. movl %ecx,120(%esp)
  473. # t += x6
  474. addl 124(%esp),%edx
  475. # x15 = w
  476. movl %ebx,160(%esp)
  477. # r = x1
  478. movl 104(%esp),%esi
  479. # r += s
  480. add %ecx,%esi
  481. # v = x11
  482. movl 144(%esp),%edi
  483. # v += w
  484. add %ebx,%edi
  485. # p <<<= 7
  486. rol $7,%eax
  487. # p ^= x4
  488. xorl 116(%esp),%eax
  489. # t <<<= 7
  490. rol $7,%edx
  491. # t ^= x14
  492. xorl 156(%esp),%edx
  493. # r <<<= 7
  494. rol $7,%esi
  495. # r ^= x9
  496. xorl 136(%esp),%esi
  497. # v <<<= 7
  498. rol $7,%edi
  499. # v ^= x3
  500. xorl 112(%esp),%edi
  501. # x4 = p
  502. movl %eax,116(%esp)
  503. # x14 = t
  504. movl %edx,156(%esp)
  505. # p += x0
  506. addl 100(%esp),%eax
  507. # x9 = r
  508. movl %esi,136(%esp)
  509. # t += x10
  510. addl 140(%esp),%edx
  511. # x3 = v
  512. movl %edi,112(%esp)
  513. # p <<<= 9
  514. rol $9,%eax
  515. # p ^= x8
  516. xorl 132(%esp),%eax
  517. # t <<<= 9
  518. rol $9,%edx
  519. # t ^= x2
  520. xorl 108(%esp),%edx
  521. # s += r
  522. add %esi,%ecx
  523. # s <<<= 9
  524. rol $9,%ecx
  525. # s ^= x13
  526. xorl 152(%esp),%ecx
  527. # w += v
  528. add %edi,%ebx
  529. # w <<<= 9
  530. rol $9,%ebx
  531. # w ^= x7
  532. xorl 128(%esp),%ebx
  533. # x8 = p
  534. movl %eax,132(%esp)
  535. # x2 = t
  536. movl %edx,108(%esp)
  537. # p += x4
  538. addl 116(%esp),%eax
  539. # x13 = s
  540. movl %ecx,152(%esp)
  541. # t += x14
  542. addl 156(%esp),%edx
  543. # x7 = w
  544. movl %ebx,128(%esp)
  545. # p <<<= 13
  546. rol $13,%eax
  547. # p ^= x12
  548. xorl 148(%esp),%eax
  549. # t <<<= 13
  550. rol $13,%edx
  551. # t ^= x6
  552. xorl 124(%esp),%edx
  553. # r += s
  554. add %ecx,%esi
  555. # r <<<= 13
  556. rol $13,%esi
  557. # r ^= x1
  558. xorl 104(%esp),%esi
  559. # v += w
  560. add %ebx,%edi
  561. # v <<<= 13
  562. rol $13,%edi
  563. # v ^= x11
  564. xorl 144(%esp),%edi
  565. # x12 = p
  566. movl %eax,148(%esp)
  567. # x6 = t
  568. movl %edx,124(%esp)
  569. # p += x8
  570. addl 132(%esp),%eax
  571. # x1 = r
  572. movl %esi,104(%esp)
  573. # t += x2
  574. addl 108(%esp),%edx
  575. # x11 = v
  576. movl %edi,144(%esp)
  577. # p <<<= 18
  578. rol $18,%eax
  579. # p ^= x0
  580. xorl 100(%esp),%eax
  581. # t <<<= 18
  582. rol $18,%edx
  583. # t ^= x10
  584. xorl 140(%esp),%edx
  585. # s += r
  586. add %esi,%ecx
  587. # s <<<= 18
  588. rol $18,%ecx
  589. # s ^= x5
  590. xorl 120(%esp),%ecx
  591. # w += v
  592. add %edi,%ebx
  593. # w <<<= 18
  594. rol $18,%ebx
  595. # w ^= x15
  596. xorl 160(%esp),%ebx
  597. # x0 = p
  598. movl %eax,100(%esp)
  599. # x10 = t
  600. movl %edx,140(%esp)
  601. # p += x3
  602. addl 112(%esp),%eax
  603. # p <<<= 7
  604. rol $7,%eax
  605. # x5 = s
  606. movl %ecx,120(%esp)
  607. # t += x9
  608. addl 136(%esp),%edx
  609. # x15 = w
  610. movl %ebx,160(%esp)
  611. # r = x4
  612. movl 116(%esp),%esi
  613. # r += s
  614. add %ecx,%esi
  615. # v = x14
  616. movl 156(%esp),%edi
  617. # v += w
  618. add %ebx,%edi
  619. # p ^= x1
  620. xorl 104(%esp),%eax
  621. # t <<<= 7
  622. rol $7,%edx
  623. # t ^= x11
  624. xorl 144(%esp),%edx
  625. # r <<<= 7
  626. rol $7,%esi
  627. # r ^= x6
  628. xorl 124(%esp),%esi
  629. # v <<<= 7
  630. rol $7,%edi
  631. # v ^= x12
  632. xorl 148(%esp),%edi
  633. # x1 = p
  634. movl %eax,104(%esp)
  635. # x11 = t
  636. movl %edx,144(%esp)
  637. # p += x0
  638. addl 100(%esp),%eax
  639. # x6 = r
  640. movl %esi,124(%esp)
  641. # t += x10
  642. addl 140(%esp),%edx
  643. # x12 = v
  644. movl %edi,148(%esp)
  645. # p <<<= 9
  646. rol $9,%eax
  647. # p ^= x2
  648. xorl 108(%esp),%eax
  649. # t <<<= 9
  650. rol $9,%edx
  651. # t ^= x8
  652. xorl 132(%esp),%edx
  653. # s += r
  654. add %esi,%ecx
  655. # s <<<= 9
  656. rol $9,%ecx
  657. # s ^= x7
  658. xorl 128(%esp),%ecx
  659. # w += v
  660. add %edi,%ebx
  661. # w <<<= 9
  662. rol $9,%ebx
  663. # w ^= x13
  664. xorl 152(%esp),%ebx
  665. # x2 = p
  666. movl %eax,108(%esp)
  667. # x8 = t
  668. movl %edx,132(%esp)
  669. # p += x1
  670. addl 104(%esp),%eax
  671. # x7 = s
  672. movl %ecx,128(%esp)
  673. # t += x11
  674. addl 144(%esp),%edx
  675. # x13 = w
  676. movl %ebx,152(%esp)
  677. # p <<<= 13
  678. rol $13,%eax
  679. # p ^= x3
  680. xorl 112(%esp),%eax
  681. # t <<<= 13
  682. rol $13,%edx
  683. # t ^= x9
  684. xorl 136(%esp),%edx
  685. # r += s
  686. add %ecx,%esi
  687. # r <<<= 13
  688. rol $13,%esi
  689. # r ^= x4
  690. xorl 116(%esp),%esi
  691. # v += w
  692. add %ebx,%edi
  693. # v <<<= 13
  694. rol $13,%edi
  695. # v ^= x14
  696. xorl 156(%esp),%edi
  697. # x3 = p
  698. movl %eax,112(%esp)
  699. # x9 = t
  700. movl %edx,136(%esp)
  701. # p += x2
  702. addl 108(%esp),%eax
  703. # x4 = r
  704. movl %esi,116(%esp)
  705. # t += x8
  706. addl 132(%esp),%edx
  707. # x14 = v
  708. movl %edi,156(%esp)
  709. # p <<<= 18
  710. rol $18,%eax
  711. # p ^= x0
  712. xorl 100(%esp),%eax
  713. # t <<<= 18
  714. rol $18,%edx
  715. # t ^= x10
  716. xorl 140(%esp),%edx
  717. # s += r
  718. add %esi,%ecx
  719. # s <<<= 18
  720. rol $18,%ecx
  721. # s ^= x5
  722. xorl 120(%esp),%ecx
  723. # w += v
  724. add %edi,%ebx
  725. # w <<<= 18
  726. rol $18,%ebx
  727. # w ^= x15
  728. xorl 160(%esp),%ebx
  729. # i -= 4
  730. sub $4,%ebp
  731. # goto mainloop if unsigned >
  732. ja ._mainloop
  733. # x0 = p
  734. movl %eax,100(%esp)
  735. # x5 = s
  736. movl %ecx,120(%esp)
  737. # x10 = t
  738. movl %edx,140(%esp)
  739. # x15 = w
  740. movl %ebx,160(%esp)
  741. # out = out_backup
  742. movl 72(%esp),%edi
  743. # m = m_backup
  744. movl 68(%esp),%esi
  745. # in0 = x0
  746. movl 100(%esp),%eax
  747. # in1 = x1
  748. movl 104(%esp),%ecx
  749. # in0 += j0
  750. addl 164(%esp),%eax
  751. # in1 += j1
  752. addl 168(%esp),%ecx
  753. # in0 ^= *(uint32 *) (m + 0)
  754. xorl 0(%esi),%eax
  755. # in1 ^= *(uint32 *) (m + 4)
  756. xorl 4(%esi),%ecx
  757. # *(uint32 *) (out + 0) = in0
  758. movl %eax,0(%edi)
  759. # *(uint32 *) (out + 4) = in1
  760. movl %ecx,4(%edi)
  761. # in2 = x2
  762. movl 108(%esp),%eax
  763. # in3 = x3
  764. movl 112(%esp),%ecx
  765. # in2 += j2
  766. addl 172(%esp),%eax
  767. # in3 += j3
  768. addl 176(%esp),%ecx
  769. # in2 ^= *(uint32 *) (m + 8)
  770. xorl 8(%esi),%eax
  771. # in3 ^= *(uint32 *) (m + 12)
  772. xorl 12(%esi),%ecx
  773. # *(uint32 *) (out + 8) = in2
  774. movl %eax,8(%edi)
  775. # *(uint32 *) (out + 12) = in3
  776. movl %ecx,12(%edi)
  777. # in4 = x4
  778. movl 116(%esp),%eax
  779. # in5 = x5
  780. movl 120(%esp),%ecx
  781. # in4 += j4
  782. addl 180(%esp),%eax
  783. # in5 += j5
  784. addl 184(%esp),%ecx
  785. # in4 ^= *(uint32 *) (m + 16)
  786. xorl 16(%esi),%eax
  787. # in5 ^= *(uint32 *) (m + 20)
  788. xorl 20(%esi),%ecx
  789. # *(uint32 *) (out + 16) = in4
  790. movl %eax,16(%edi)
  791. # *(uint32 *) (out + 20) = in5
  792. movl %ecx,20(%edi)
  793. # in6 = x6
  794. movl 124(%esp),%eax
  795. # in7 = x7
  796. movl 128(%esp),%ecx
  797. # in6 += j6
  798. addl 188(%esp),%eax
  799. # in7 += j7
  800. addl 192(%esp),%ecx
  801. # in6 ^= *(uint32 *) (m + 24)
  802. xorl 24(%esi),%eax
  803. # in7 ^= *(uint32 *) (m + 28)
  804. xorl 28(%esi),%ecx
  805. # *(uint32 *) (out + 24) = in6
  806. movl %eax,24(%edi)
  807. # *(uint32 *) (out + 28) = in7
  808. movl %ecx,28(%edi)
  809. # in8 = x8
  810. movl 132(%esp),%eax
  811. # in9 = x9
  812. movl 136(%esp),%ecx
  813. # in8 += j8
  814. addl 196(%esp),%eax
  815. # in9 += j9
  816. addl 200(%esp),%ecx
  817. # in8 ^= *(uint32 *) (m + 32)
  818. xorl 32(%esi),%eax
  819. # in9 ^= *(uint32 *) (m + 36)
  820. xorl 36(%esi),%ecx
  821. # *(uint32 *) (out + 32) = in8
  822. movl %eax,32(%edi)
  823. # *(uint32 *) (out + 36) = in9
  824. movl %ecx,36(%edi)
  825. # in10 = x10
  826. movl 140(%esp),%eax
  827. # in11 = x11
  828. movl 144(%esp),%ecx
  829. # in10 += j10
  830. addl 204(%esp),%eax
  831. # in11 += j11
  832. addl 208(%esp),%ecx
  833. # in10 ^= *(uint32 *) (m + 40)
  834. xorl 40(%esi),%eax
  835. # in11 ^= *(uint32 *) (m + 44)
  836. xorl 44(%esi),%ecx
  837. # *(uint32 *) (out + 40) = in10
  838. movl %eax,40(%edi)
  839. # *(uint32 *) (out + 44) = in11
  840. movl %ecx,44(%edi)
  841. # in12 = x12
  842. movl 148(%esp),%eax
  843. # in13 = x13
  844. movl 152(%esp),%ecx
  845. # in12 += j12
  846. addl 212(%esp),%eax
  847. # in13 += j13
  848. addl 216(%esp),%ecx
  849. # in12 ^= *(uint32 *) (m + 48)
  850. xorl 48(%esi),%eax
  851. # in13 ^= *(uint32 *) (m + 52)
  852. xorl 52(%esi),%ecx
  853. # *(uint32 *) (out + 48) = in12
  854. movl %eax,48(%edi)
  855. # *(uint32 *) (out + 52) = in13
  856. movl %ecx,52(%edi)
  857. # in14 = x14
  858. movl 156(%esp),%eax
  859. # in15 = x15
  860. movl 160(%esp),%ecx
  861. # in14 += j14
  862. addl 220(%esp),%eax
  863. # in15 += j15
  864. addl 224(%esp),%ecx
  865. # in14 ^= *(uint32 *) (m + 56)
  866. xorl 56(%esi),%eax
  867. # in15 ^= *(uint32 *) (m + 60)
  868. xorl 60(%esi),%ecx
  869. # *(uint32 *) (out + 56) = in14
  870. movl %eax,56(%edi)
  871. # *(uint32 *) (out + 60) = in15
  872. movl %ecx,60(%edi)
  873. # bytes = bytes_backup
  874. movl 76(%esp),%ebx
  875. # in8 = j8
  876. movl 196(%esp),%eax
  877. # in9 = j9
  878. movl 200(%esp),%ecx
  879. # in8 += 1
  880. add $1,%eax
  881. # in9 += 0 + carry
  882. adc $0,%ecx
  883. # j8 = in8
  884. movl %eax,196(%esp)
  885. # j9 = in9
  886. movl %ecx,200(%esp)
  887. # bytes - 64
  888. cmp $64,%ebx
  889. # goto bytesatleast65 if unsigned>
  890. ja ._bytesatleast65
  891. # goto bytesatleast64 if unsigned>=
  892. jae ._bytesatleast64
  893. # m = out
  894. mov %edi,%esi
  895. # out = ctarget
  896. movl 228(%esp),%edi
  897. # i = bytes
  898. mov %ebx,%ecx
  899. # while (i) { *out++ = *m++; --i }
  900. rep movsb
  901. ._bytesatleast64:
  902. # x = x_backup
  903. movl 64(%esp),%eax
  904. # in8 = j8
  905. movl 196(%esp),%ecx
  906. # in9 = j9
  907. movl 200(%esp),%edx
  908. # *(uint32 *) (x + 32) = in8
  909. movl %ecx,32(%eax)
  910. # *(uint32 *) (x + 36) = in9
  911. movl %edx,36(%eax)
  912. ._done:
  913. # eax = eax_stack
  914. movl 80(%esp),%eax
  915. # ebx = ebx_stack
  916. movl 84(%esp),%ebx
  917. # esi = esi_stack
  918. movl 88(%esp),%esi
  919. # edi = edi_stack
  920. movl 92(%esp),%edi
  921. # ebp = ebp_stack
  922. movl 96(%esp),%ebp
  923. # leave
  924. add %eax,%esp
  925. ret
  926. ._bytesatleast65:
  927. # bytes -= 64
  928. sub $64,%ebx
  929. # out += 64
  930. add $64,%edi
  931. # m += 64
  932. add $64,%esi
  933. # goto bytesatleast1
  934. jmp ._bytesatleast1
  935. # enter ECRYPT_keysetup
  936. .text
  937. .p2align 5
  938. .globl ECRYPT_keysetup
  939. ECRYPT_keysetup:
  940. mov %esp,%eax
  941. and $31,%eax
  942. add $256,%eax
  943. sub %eax,%esp
  944. # eax_stack = eax
  945. movl %eax,64(%esp)
  946. # ebx_stack = ebx
  947. movl %ebx,68(%esp)
  948. # esi_stack = esi
  949. movl %esi,72(%esp)
  950. # edi_stack = edi
  951. movl %edi,76(%esp)
  952. # ebp_stack = ebp
  953. movl %ebp,80(%esp)
  954. # k = arg2
  955. movl 8(%esp,%eax),%ecx
  956. # kbits = arg3
  957. movl 12(%esp,%eax),%edx
  958. # x = arg1
  959. movl 4(%esp,%eax),%eax
  960. # in1 = *(uint32 *) (k + 0)
  961. movl 0(%ecx),%ebx
  962. # in2 = *(uint32 *) (k + 4)
  963. movl 4(%ecx),%esi
  964. # in3 = *(uint32 *) (k + 8)
  965. movl 8(%ecx),%edi
  966. # in4 = *(uint32 *) (k + 12)
  967. movl 12(%ecx),%ebp
  968. # *(uint32 *) (x + 4) = in1
  969. movl %ebx,4(%eax)
  970. # *(uint32 *) (x + 8) = in2
  971. movl %esi,8(%eax)
  972. # *(uint32 *) (x + 12) = in3
  973. movl %edi,12(%eax)
  974. # *(uint32 *) (x + 16) = in4
  975. movl %ebp,16(%eax)
  976. # kbits - 256
  977. cmp $256,%edx
  978. # goto kbits128 if unsigned<
  979. jb ._kbits128
  980. ._kbits256:
  981. # in11 = *(uint32 *) (k + 16)
  982. movl 16(%ecx),%edx
  983. # in12 = *(uint32 *) (k + 20)
  984. movl 20(%ecx),%ebx
  985. # in13 = *(uint32 *) (k + 24)
  986. movl 24(%ecx),%esi
  987. # in14 = *(uint32 *) (k + 28)
  988. movl 28(%ecx),%ecx
  989. # *(uint32 *) (x + 44) = in11
  990. movl %edx,44(%eax)
  991. # *(uint32 *) (x + 48) = in12
  992. movl %ebx,48(%eax)
  993. # *(uint32 *) (x + 52) = in13
  994. movl %esi,52(%eax)
  995. # *(uint32 *) (x + 56) = in14
  996. movl %ecx,56(%eax)
  997. # in0 = 1634760805
  998. mov $1634760805,%ecx
  999. # in5 = 857760878
  1000. mov $857760878,%edx
  1001. # in10 = 2036477234
  1002. mov $2036477234,%ebx
  1003. # in15 = 1797285236
  1004. mov $1797285236,%esi
  1005. # *(uint32 *) (x + 0) = in0
  1006. movl %ecx,0(%eax)
  1007. # *(uint32 *) (x + 20) = in5
  1008. movl %edx,20(%eax)
  1009. # *(uint32 *) (x + 40) = in10
  1010. movl %ebx,40(%eax)
  1011. # *(uint32 *) (x + 60) = in15
  1012. movl %esi,60(%eax)
  1013. # goto keysetupdone
  1014. jmp ._keysetupdone
  1015. ._kbits128:
  1016. # in11 = *(uint32 *) (k + 0)
  1017. movl 0(%ecx),%edx
  1018. # in12 = *(uint32 *) (k + 4)
  1019. movl 4(%ecx),%ebx
  1020. # in13 = *(uint32 *) (k + 8)
  1021. movl 8(%ecx),%esi
  1022. # in14 = *(uint32 *) (k + 12)
  1023. movl 12(%ecx),%ecx
  1024. # *(uint32 *) (x + 44) = in11
  1025. movl %edx,44(%eax)
  1026. # *(uint32 *) (x + 48) = in12
  1027. movl %ebx,48(%eax)
  1028. # *(uint32 *) (x + 52) = in13
  1029. movl %esi,52(%eax)
  1030. # *(uint32 *) (x + 56) = in14
  1031. movl %ecx,56(%eax)
  1032. # in0 = 1634760805
  1033. mov $1634760805,%ecx
  1034. # in5 = 824206446
  1035. mov $824206446,%edx
  1036. # in10 = 2036477238
  1037. mov $2036477238,%ebx
  1038. # in15 = 1797285236
  1039. mov $1797285236,%esi
  1040. # *(uint32 *) (x + 0) = in0
  1041. movl %ecx,0(%eax)
  1042. # *(uint32 *) (x + 20) = in5
  1043. movl %edx,20(%eax)
  1044. # *(uint32 *) (x + 40) = in10
  1045. movl %ebx,40(%eax)
  1046. # *(uint32 *) (x + 60) = in15
  1047. movl %esi,60(%eax)
  1048. ._keysetupdone:
  1049. # eax = eax_stack
  1050. movl 64(%esp),%eax
  1051. # ebx = ebx_stack
  1052. movl 68(%esp),%ebx
  1053. # esi = esi_stack
  1054. movl 72(%esp),%esi
  1055. # edi = edi_stack
  1056. movl 76(%esp),%edi
  1057. # ebp = ebp_stack
  1058. movl 80(%esp),%ebp
  1059. # leave
  1060. add %eax,%esp
  1061. ret
  1062. # enter ECRYPT_ivsetup
  1063. .text
  1064. .p2align 5
  1065. .globl ECRYPT_ivsetup
  1066. ECRYPT_ivsetup:
  1067. mov %esp,%eax
  1068. and $31,%eax
  1069. add $256,%eax
  1070. sub %eax,%esp
  1071. # eax_stack = eax
  1072. movl %eax,64(%esp)
  1073. # ebx_stack = ebx
  1074. movl %ebx,68(%esp)
  1075. # esi_stack = esi
  1076. movl %esi,72(%esp)
  1077. # edi_stack = edi
  1078. movl %edi,76(%esp)
  1079. # ebp_stack = ebp
  1080. movl %ebp,80(%esp)
  1081. # iv = arg2
  1082. movl 8(%esp,%eax),%ecx
  1083. # x = arg1
  1084. movl 4(%esp,%eax),%eax
  1085. # in6 = *(uint32 *) (iv + 0)
  1086. movl 0(%ecx),%edx
  1087. # in7 = *(uint32 *) (iv + 4)
  1088. movl 4(%ecx),%ecx
  1089. # in8 = 0
  1090. mov $0,%ebx
  1091. # in9 = 0
  1092. mov $0,%esi
  1093. # *(uint32 *) (x + 24) = in6
  1094. movl %edx,24(%eax)
  1095. # *(uint32 *) (x + 28) = in7
  1096. movl %ecx,28(%eax)
  1097. # *(uint32 *) (x + 32) = in8
  1098. movl %ebx,32(%eax)
  1099. # *(uint32 *) (x + 36) = in9
  1100. movl %esi,36(%eax)
  1101. # eax = eax_stack
  1102. movl 64(%esp),%eax
  1103. # ebx = ebx_stack
  1104. movl 68(%esp),%ebx
  1105. # esi = esi_stack
  1106. movl 72(%esp),%esi
  1107. # edi = edi_stack
  1108. movl 76(%esp),%edi
  1109. # ebp = ebp_stack
  1110. movl 80(%esp),%ebp
  1111. # leave
  1112. add %eax,%esp
  1113. ret