salsa20-i586-asm_32.S 20 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115
  1. # salsa20_pm.s version 20051229
  2. # D. J. Bernstein
  3. # Public domain.
  4. #include <linux/linkage.h>
  5. .text
  6. # enter salsa20_encrypt_bytes
  7. ENTRY(salsa20_encrypt_bytes)
  8. mov %esp,%eax
  9. and $31,%eax
  10. add $256,%eax
  11. sub %eax,%esp
  12. # eax_stack = eax
  13. movl %eax,80(%esp)
  14. # ebx_stack = ebx
  15. movl %ebx,84(%esp)
  16. # esi_stack = esi
  17. movl %esi,88(%esp)
  18. # edi_stack = edi
  19. movl %edi,92(%esp)
  20. # ebp_stack = ebp
  21. movl %ebp,96(%esp)
  22. # x = arg1
  23. movl 4(%esp,%eax),%edx
  24. # m = arg2
  25. movl 8(%esp,%eax),%esi
  26. # out = arg3
  27. movl 12(%esp,%eax),%edi
  28. # bytes = arg4
  29. movl 16(%esp,%eax),%ebx
  30. # bytes -= 0
  31. sub $0,%ebx
  32. # goto done if unsigned<=
  33. jbe ._done
  34. ._start:
  35. # in0 = *(uint32 *) (x + 0)
  36. movl 0(%edx),%eax
  37. # in1 = *(uint32 *) (x + 4)
  38. movl 4(%edx),%ecx
  39. # in2 = *(uint32 *) (x + 8)
  40. movl 8(%edx),%ebp
  41. # j0 = in0
  42. movl %eax,164(%esp)
  43. # in3 = *(uint32 *) (x + 12)
  44. movl 12(%edx),%eax
  45. # j1 = in1
  46. movl %ecx,168(%esp)
  47. # in4 = *(uint32 *) (x + 16)
  48. movl 16(%edx),%ecx
  49. # j2 = in2
  50. movl %ebp,172(%esp)
  51. # in5 = *(uint32 *) (x + 20)
  52. movl 20(%edx),%ebp
  53. # j3 = in3
  54. movl %eax,176(%esp)
  55. # in6 = *(uint32 *) (x + 24)
  56. movl 24(%edx),%eax
  57. # j4 = in4
  58. movl %ecx,180(%esp)
  59. # in7 = *(uint32 *) (x + 28)
  60. movl 28(%edx),%ecx
  61. # j5 = in5
  62. movl %ebp,184(%esp)
  63. # in8 = *(uint32 *) (x + 32)
  64. movl 32(%edx),%ebp
  65. # j6 = in6
  66. movl %eax,188(%esp)
  67. # in9 = *(uint32 *) (x + 36)
  68. movl 36(%edx),%eax
  69. # j7 = in7
  70. movl %ecx,192(%esp)
  71. # in10 = *(uint32 *) (x + 40)
  72. movl 40(%edx),%ecx
  73. # j8 = in8
  74. movl %ebp,196(%esp)
  75. # in11 = *(uint32 *) (x + 44)
  76. movl 44(%edx),%ebp
  77. # j9 = in9
  78. movl %eax,200(%esp)
  79. # in12 = *(uint32 *) (x + 48)
  80. movl 48(%edx),%eax
  81. # j10 = in10
  82. movl %ecx,204(%esp)
  83. # in13 = *(uint32 *) (x + 52)
  84. movl 52(%edx),%ecx
  85. # j11 = in11
  86. movl %ebp,208(%esp)
  87. # in14 = *(uint32 *) (x + 56)
  88. movl 56(%edx),%ebp
  89. # j12 = in12
  90. movl %eax,212(%esp)
  91. # in15 = *(uint32 *) (x + 60)
  92. movl 60(%edx),%eax
  93. # j13 = in13
  94. movl %ecx,216(%esp)
  95. # j14 = in14
  96. movl %ebp,220(%esp)
  97. # j15 = in15
  98. movl %eax,224(%esp)
  99. # x_backup = x
  100. movl %edx,64(%esp)
  101. ._bytesatleast1:
  102. # bytes - 64
  103. cmp $64,%ebx
  104. # goto nocopy if unsigned>=
  105. jae ._nocopy
  106. # ctarget = out
  107. movl %edi,228(%esp)
  108. # out = &tmp
  109. leal 0(%esp),%edi
  110. # i = bytes
  111. mov %ebx,%ecx
  112. # while (i) { *out++ = *m++; --i }
  113. rep movsb
  114. # out = &tmp
  115. leal 0(%esp),%edi
  116. # m = &tmp
  117. leal 0(%esp),%esi
  118. ._nocopy:
  119. # out_backup = out
  120. movl %edi,72(%esp)
  121. # m_backup = m
  122. movl %esi,68(%esp)
  123. # bytes_backup = bytes
  124. movl %ebx,76(%esp)
  125. # in0 = j0
  126. movl 164(%esp),%eax
  127. # in1 = j1
  128. movl 168(%esp),%ecx
  129. # in2 = j2
  130. movl 172(%esp),%edx
  131. # in3 = j3
  132. movl 176(%esp),%ebx
  133. # x0 = in0
  134. movl %eax,100(%esp)
  135. # x1 = in1
  136. movl %ecx,104(%esp)
  137. # x2 = in2
  138. movl %edx,108(%esp)
  139. # x3 = in3
  140. movl %ebx,112(%esp)
  141. # in4 = j4
  142. movl 180(%esp),%eax
  143. # in5 = j5
  144. movl 184(%esp),%ecx
  145. # in6 = j6
  146. movl 188(%esp),%edx
  147. # in7 = j7
  148. movl 192(%esp),%ebx
  149. # x4 = in4
  150. movl %eax,116(%esp)
  151. # x5 = in5
  152. movl %ecx,120(%esp)
  153. # x6 = in6
  154. movl %edx,124(%esp)
  155. # x7 = in7
  156. movl %ebx,128(%esp)
  157. # in8 = j8
  158. movl 196(%esp),%eax
  159. # in9 = j9
  160. movl 200(%esp),%ecx
  161. # in10 = j10
  162. movl 204(%esp),%edx
  163. # in11 = j11
  164. movl 208(%esp),%ebx
  165. # x8 = in8
  166. movl %eax,132(%esp)
  167. # x9 = in9
  168. movl %ecx,136(%esp)
  169. # x10 = in10
  170. movl %edx,140(%esp)
  171. # x11 = in11
  172. movl %ebx,144(%esp)
  173. # in12 = j12
  174. movl 212(%esp),%eax
  175. # in13 = j13
  176. movl 216(%esp),%ecx
  177. # in14 = j14
  178. movl 220(%esp),%edx
  179. # in15 = j15
  180. movl 224(%esp),%ebx
  181. # x12 = in12
  182. movl %eax,148(%esp)
  183. # x13 = in13
  184. movl %ecx,152(%esp)
  185. # x14 = in14
  186. movl %edx,156(%esp)
  187. # x15 = in15
  188. movl %ebx,160(%esp)
  189. # i = 20
  190. mov $20,%ebp
  191. # p = x0
  192. movl 100(%esp),%eax
  193. # s = x5
  194. movl 120(%esp),%ecx
  195. # t = x10
  196. movl 140(%esp),%edx
  197. # w = x15
  198. movl 160(%esp),%ebx
  199. ._mainloop:
  200. # x0 = p
  201. movl %eax,100(%esp)
  202. # x10 = t
  203. movl %edx,140(%esp)
  204. # p += x12
  205. addl 148(%esp),%eax
  206. # x5 = s
  207. movl %ecx,120(%esp)
  208. # t += x6
  209. addl 124(%esp),%edx
  210. # x15 = w
  211. movl %ebx,160(%esp)
  212. # r = x1
  213. movl 104(%esp),%esi
  214. # r += s
  215. add %ecx,%esi
  216. # v = x11
  217. movl 144(%esp),%edi
  218. # v += w
  219. add %ebx,%edi
  220. # p <<<= 7
  221. rol $7,%eax
  222. # p ^= x4
  223. xorl 116(%esp),%eax
  224. # t <<<= 7
  225. rol $7,%edx
  226. # t ^= x14
  227. xorl 156(%esp),%edx
  228. # r <<<= 7
  229. rol $7,%esi
  230. # r ^= x9
  231. xorl 136(%esp),%esi
  232. # v <<<= 7
  233. rol $7,%edi
  234. # v ^= x3
  235. xorl 112(%esp),%edi
  236. # x4 = p
  237. movl %eax,116(%esp)
  238. # x14 = t
  239. movl %edx,156(%esp)
  240. # p += x0
  241. addl 100(%esp),%eax
  242. # x9 = r
  243. movl %esi,136(%esp)
  244. # t += x10
  245. addl 140(%esp),%edx
  246. # x3 = v
  247. movl %edi,112(%esp)
  248. # p <<<= 9
  249. rol $9,%eax
  250. # p ^= x8
  251. xorl 132(%esp),%eax
  252. # t <<<= 9
  253. rol $9,%edx
  254. # t ^= x2
  255. xorl 108(%esp),%edx
  256. # s += r
  257. add %esi,%ecx
  258. # s <<<= 9
  259. rol $9,%ecx
  260. # s ^= x13
  261. xorl 152(%esp),%ecx
  262. # w += v
  263. add %edi,%ebx
  264. # w <<<= 9
  265. rol $9,%ebx
  266. # w ^= x7
  267. xorl 128(%esp),%ebx
  268. # x8 = p
  269. movl %eax,132(%esp)
  270. # x2 = t
  271. movl %edx,108(%esp)
  272. # p += x4
  273. addl 116(%esp),%eax
  274. # x13 = s
  275. movl %ecx,152(%esp)
  276. # t += x14
  277. addl 156(%esp),%edx
  278. # x7 = w
  279. movl %ebx,128(%esp)
  280. # p <<<= 13
  281. rol $13,%eax
  282. # p ^= x12
  283. xorl 148(%esp),%eax
  284. # t <<<= 13
  285. rol $13,%edx
  286. # t ^= x6
  287. xorl 124(%esp),%edx
  288. # r += s
  289. add %ecx,%esi
  290. # r <<<= 13
  291. rol $13,%esi
  292. # r ^= x1
  293. xorl 104(%esp),%esi
  294. # v += w
  295. add %ebx,%edi
  296. # v <<<= 13
  297. rol $13,%edi
  298. # v ^= x11
  299. xorl 144(%esp),%edi
  300. # x12 = p
  301. movl %eax,148(%esp)
  302. # x6 = t
  303. movl %edx,124(%esp)
  304. # p += x8
  305. addl 132(%esp),%eax
  306. # x1 = r
  307. movl %esi,104(%esp)
  308. # t += x2
  309. addl 108(%esp),%edx
  310. # x11 = v
  311. movl %edi,144(%esp)
  312. # p <<<= 18
  313. rol $18,%eax
  314. # p ^= x0
  315. xorl 100(%esp),%eax
  316. # t <<<= 18
  317. rol $18,%edx
  318. # t ^= x10
  319. xorl 140(%esp),%edx
  320. # s += r
  321. add %esi,%ecx
  322. # s <<<= 18
  323. rol $18,%ecx
  324. # s ^= x5
  325. xorl 120(%esp),%ecx
  326. # w += v
  327. add %edi,%ebx
  328. # w <<<= 18
  329. rol $18,%ebx
  330. # w ^= x15
  331. xorl 160(%esp),%ebx
  332. # x0 = p
  333. movl %eax,100(%esp)
  334. # x10 = t
  335. movl %edx,140(%esp)
  336. # p += x3
  337. addl 112(%esp),%eax
  338. # p <<<= 7
  339. rol $7,%eax
  340. # x5 = s
  341. movl %ecx,120(%esp)
  342. # t += x9
  343. addl 136(%esp),%edx
  344. # x15 = w
  345. movl %ebx,160(%esp)
  346. # r = x4
  347. movl 116(%esp),%esi
  348. # r += s
  349. add %ecx,%esi
  350. # v = x14
  351. movl 156(%esp),%edi
  352. # v += w
  353. add %ebx,%edi
  354. # p ^= x1
  355. xorl 104(%esp),%eax
  356. # t <<<= 7
  357. rol $7,%edx
  358. # t ^= x11
  359. xorl 144(%esp),%edx
  360. # r <<<= 7
  361. rol $7,%esi
  362. # r ^= x6
  363. xorl 124(%esp),%esi
  364. # v <<<= 7
  365. rol $7,%edi
  366. # v ^= x12
  367. xorl 148(%esp),%edi
  368. # x1 = p
  369. movl %eax,104(%esp)
  370. # x11 = t
  371. movl %edx,144(%esp)
  372. # p += x0
  373. addl 100(%esp),%eax
  374. # x6 = r
  375. movl %esi,124(%esp)
  376. # t += x10
  377. addl 140(%esp),%edx
  378. # x12 = v
  379. movl %edi,148(%esp)
  380. # p <<<= 9
  381. rol $9,%eax
  382. # p ^= x2
  383. xorl 108(%esp),%eax
  384. # t <<<= 9
  385. rol $9,%edx
  386. # t ^= x8
  387. xorl 132(%esp),%edx
  388. # s += r
  389. add %esi,%ecx
  390. # s <<<= 9
  391. rol $9,%ecx
  392. # s ^= x7
  393. xorl 128(%esp),%ecx
  394. # w += v
  395. add %edi,%ebx
  396. # w <<<= 9
  397. rol $9,%ebx
  398. # w ^= x13
  399. xorl 152(%esp),%ebx
  400. # x2 = p
  401. movl %eax,108(%esp)
  402. # x8 = t
  403. movl %edx,132(%esp)
  404. # p += x1
  405. addl 104(%esp),%eax
  406. # x7 = s
  407. movl %ecx,128(%esp)
  408. # t += x11
  409. addl 144(%esp),%edx
  410. # x13 = w
  411. movl %ebx,152(%esp)
  412. # p <<<= 13
  413. rol $13,%eax
  414. # p ^= x3
  415. xorl 112(%esp),%eax
  416. # t <<<= 13
  417. rol $13,%edx
  418. # t ^= x9
  419. xorl 136(%esp),%edx
  420. # r += s
  421. add %ecx,%esi
  422. # r <<<= 13
  423. rol $13,%esi
  424. # r ^= x4
  425. xorl 116(%esp),%esi
  426. # v += w
  427. add %ebx,%edi
  428. # v <<<= 13
  429. rol $13,%edi
  430. # v ^= x14
  431. xorl 156(%esp),%edi
  432. # x3 = p
  433. movl %eax,112(%esp)
  434. # x9 = t
  435. movl %edx,136(%esp)
  436. # p += x2
  437. addl 108(%esp),%eax
  438. # x4 = r
  439. movl %esi,116(%esp)
  440. # t += x8
  441. addl 132(%esp),%edx
  442. # x14 = v
  443. movl %edi,156(%esp)
  444. # p <<<= 18
  445. rol $18,%eax
  446. # p ^= x0
  447. xorl 100(%esp),%eax
  448. # t <<<= 18
  449. rol $18,%edx
  450. # t ^= x10
  451. xorl 140(%esp),%edx
  452. # s += r
  453. add %esi,%ecx
  454. # s <<<= 18
  455. rol $18,%ecx
  456. # s ^= x5
  457. xorl 120(%esp),%ecx
  458. # w += v
  459. add %edi,%ebx
  460. # w <<<= 18
  461. rol $18,%ebx
  462. # w ^= x15
  463. xorl 160(%esp),%ebx
  464. # x0 = p
  465. movl %eax,100(%esp)
  466. # x10 = t
  467. movl %edx,140(%esp)
  468. # p += x12
  469. addl 148(%esp),%eax
  470. # x5 = s
  471. movl %ecx,120(%esp)
  472. # t += x6
  473. addl 124(%esp),%edx
  474. # x15 = w
  475. movl %ebx,160(%esp)
  476. # r = x1
  477. movl 104(%esp),%esi
  478. # r += s
  479. add %ecx,%esi
  480. # v = x11
  481. movl 144(%esp),%edi
  482. # v += w
  483. add %ebx,%edi
  484. # p <<<= 7
  485. rol $7,%eax
  486. # p ^= x4
  487. xorl 116(%esp),%eax
  488. # t <<<= 7
  489. rol $7,%edx
  490. # t ^= x14
  491. xorl 156(%esp),%edx
  492. # r <<<= 7
  493. rol $7,%esi
  494. # r ^= x9
  495. xorl 136(%esp),%esi
  496. # v <<<= 7
  497. rol $7,%edi
  498. # v ^= x3
  499. xorl 112(%esp),%edi
  500. # x4 = p
  501. movl %eax,116(%esp)
  502. # x14 = t
  503. movl %edx,156(%esp)
  504. # p += x0
  505. addl 100(%esp),%eax
  506. # x9 = r
  507. movl %esi,136(%esp)
  508. # t += x10
  509. addl 140(%esp),%edx
  510. # x3 = v
  511. movl %edi,112(%esp)
  512. # p <<<= 9
  513. rol $9,%eax
  514. # p ^= x8
  515. xorl 132(%esp),%eax
  516. # t <<<= 9
  517. rol $9,%edx
  518. # t ^= x2
  519. xorl 108(%esp),%edx
  520. # s += r
  521. add %esi,%ecx
  522. # s <<<= 9
  523. rol $9,%ecx
  524. # s ^= x13
  525. xorl 152(%esp),%ecx
  526. # w += v
  527. add %edi,%ebx
  528. # w <<<= 9
  529. rol $9,%ebx
  530. # w ^= x7
  531. xorl 128(%esp),%ebx
  532. # x8 = p
  533. movl %eax,132(%esp)
  534. # x2 = t
  535. movl %edx,108(%esp)
  536. # p += x4
  537. addl 116(%esp),%eax
  538. # x13 = s
  539. movl %ecx,152(%esp)
  540. # t += x14
  541. addl 156(%esp),%edx
  542. # x7 = w
  543. movl %ebx,128(%esp)
  544. # p <<<= 13
  545. rol $13,%eax
  546. # p ^= x12
  547. xorl 148(%esp),%eax
  548. # t <<<= 13
  549. rol $13,%edx
  550. # t ^= x6
  551. xorl 124(%esp),%edx
  552. # r += s
  553. add %ecx,%esi
  554. # r <<<= 13
  555. rol $13,%esi
  556. # r ^= x1
  557. xorl 104(%esp),%esi
  558. # v += w
  559. add %ebx,%edi
  560. # v <<<= 13
  561. rol $13,%edi
  562. # v ^= x11
  563. xorl 144(%esp),%edi
  564. # x12 = p
  565. movl %eax,148(%esp)
  566. # x6 = t
  567. movl %edx,124(%esp)
  568. # p += x8
  569. addl 132(%esp),%eax
  570. # x1 = r
  571. movl %esi,104(%esp)
  572. # t += x2
  573. addl 108(%esp),%edx
  574. # x11 = v
  575. movl %edi,144(%esp)
  576. # p <<<= 18
  577. rol $18,%eax
  578. # p ^= x0
  579. xorl 100(%esp),%eax
  580. # t <<<= 18
  581. rol $18,%edx
  582. # t ^= x10
  583. xorl 140(%esp),%edx
  584. # s += r
  585. add %esi,%ecx
  586. # s <<<= 18
  587. rol $18,%ecx
  588. # s ^= x5
  589. xorl 120(%esp),%ecx
  590. # w += v
  591. add %edi,%ebx
  592. # w <<<= 18
  593. rol $18,%ebx
  594. # w ^= x15
  595. xorl 160(%esp),%ebx
  596. # x0 = p
  597. movl %eax,100(%esp)
  598. # x10 = t
  599. movl %edx,140(%esp)
  600. # p += x3
  601. addl 112(%esp),%eax
  602. # p <<<= 7
  603. rol $7,%eax
  604. # x5 = s
  605. movl %ecx,120(%esp)
  606. # t += x9
  607. addl 136(%esp),%edx
  608. # x15 = w
  609. movl %ebx,160(%esp)
  610. # r = x4
  611. movl 116(%esp),%esi
  612. # r += s
  613. add %ecx,%esi
  614. # v = x14
  615. movl 156(%esp),%edi
  616. # v += w
  617. add %ebx,%edi
  618. # p ^= x1
  619. xorl 104(%esp),%eax
  620. # t <<<= 7
  621. rol $7,%edx
  622. # t ^= x11
  623. xorl 144(%esp),%edx
  624. # r <<<= 7
  625. rol $7,%esi
  626. # r ^= x6
  627. xorl 124(%esp),%esi
  628. # v <<<= 7
  629. rol $7,%edi
  630. # v ^= x12
  631. xorl 148(%esp),%edi
  632. # x1 = p
  633. movl %eax,104(%esp)
  634. # x11 = t
  635. movl %edx,144(%esp)
  636. # p += x0
  637. addl 100(%esp),%eax
  638. # x6 = r
  639. movl %esi,124(%esp)
  640. # t += x10
  641. addl 140(%esp),%edx
  642. # x12 = v
  643. movl %edi,148(%esp)
  644. # p <<<= 9
  645. rol $9,%eax
  646. # p ^= x2
  647. xorl 108(%esp),%eax
  648. # t <<<= 9
  649. rol $9,%edx
  650. # t ^= x8
  651. xorl 132(%esp),%edx
  652. # s += r
  653. add %esi,%ecx
  654. # s <<<= 9
  655. rol $9,%ecx
  656. # s ^= x7
  657. xorl 128(%esp),%ecx
  658. # w += v
  659. add %edi,%ebx
  660. # w <<<= 9
  661. rol $9,%ebx
  662. # w ^= x13
  663. xorl 152(%esp),%ebx
  664. # x2 = p
  665. movl %eax,108(%esp)
  666. # x8 = t
  667. movl %edx,132(%esp)
  668. # p += x1
  669. addl 104(%esp),%eax
  670. # x7 = s
  671. movl %ecx,128(%esp)
  672. # t += x11
  673. addl 144(%esp),%edx
  674. # x13 = w
  675. movl %ebx,152(%esp)
  676. # p <<<= 13
  677. rol $13,%eax
  678. # p ^= x3
  679. xorl 112(%esp),%eax
  680. # t <<<= 13
  681. rol $13,%edx
  682. # t ^= x9
  683. xorl 136(%esp),%edx
  684. # r += s
  685. add %ecx,%esi
  686. # r <<<= 13
  687. rol $13,%esi
  688. # r ^= x4
  689. xorl 116(%esp),%esi
  690. # v += w
  691. add %ebx,%edi
  692. # v <<<= 13
  693. rol $13,%edi
  694. # v ^= x14
  695. xorl 156(%esp),%edi
  696. # x3 = p
  697. movl %eax,112(%esp)
  698. # x9 = t
  699. movl %edx,136(%esp)
  700. # p += x2
  701. addl 108(%esp),%eax
  702. # x4 = r
  703. movl %esi,116(%esp)
  704. # t += x8
  705. addl 132(%esp),%edx
  706. # x14 = v
  707. movl %edi,156(%esp)
  708. # p <<<= 18
  709. rol $18,%eax
  710. # p ^= x0
  711. xorl 100(%esp),%eax
  712. # t <<<= 18
  713. rol $18,%edx
  714. # t ^= x10
  715. xorl 140(%esp),%edx
  716. # s += r
  717. add %esi,%ecx
  718. # s <<<= 18
  719. rol $18,%ecx
  720. # s ^= x5
  721. xorl 120(%esp),%ecx
  722. # w += v
  723. add %edi,%ebx
  724. # w <<<= 18
  725. rol $18,%ebx
  726. # w ^= x15
  727. xorl 160(%esp),%ebx
  728. # i -= 4
  729. sub $4,%ebp
  730. # goto mainloop if unsigned >
  731. ja ._mainloop
  732. # x0 = p
  733. movl %eax,100(%esp)
  734. # x5 = s
  735. movl %ecx,120(%esp)
  736. # x10 = t
  737. movl %edx,140(%esp)
  738. # x15 = w
  739. movl %ebx,160(%esp)
  740. # out = out_backup
  741. movl 72(%esp),%edi
  742. # m = m_backup
  743. movl 68(%esp),%esi
  744. # in0 = x0
  745. movl 100(%esp),%eax
  746. # in1 = x1
  747. movl 104(%esp),%ecx
  748. # in0 += j0
  749. addl 164(%esp),%eax
  750. # in1 += j1
  751. addl 168(%esp),%ecx
  752. # in0 ^= *(uint32 *) (m + 0)
  753. xorl 0(%esi),%eax
  754. # in1 ^= *(uint32 *) (m + 4)
  755. xorl 4(%esi),%ecx
  756. # *(uint32 *) (out + 0) = in0
  757. movl %eax,0(%edi)
  758. # *(uint32 *) (out + 4) = in1
  759. movl %ecx,4(%edi)
  760. # in2 = x2
  761. movl 108(%esp),%eax
  762. # in3 = x3
  763. movl 112(%esp),%ecx
  764. # in2 += j2
  765. addl 172(%esp),%eax
  766. # in3 += j3
  767. addl 176(%esp),%ecx
  768. # in2 ^= *(uint32 *) (m + 8)
  769. xorl 8(%esi),%eax
  770. # in3 ^= *(uint32 *) (m + 12)
  771. xorl 12(%esi),%ecx
  772. # *(uint32 *) (out + 8) = in2
  773. movl %eax,8(%edi)
  774. # *(uint32 *) (out + 12) = in3
  775. movl %ecx,12(%edi)
  776. # in4 = x4
  777. movl 116(%esp),%eax
  778. # in5 = x5
  779. movl 120(%esp),%ecx
  780. # in4 += j4
  781. addl 180(%esp),%eax
  782. # in5 += j5
  783. addl 184(%esp),%ecx
  784. # in4 ^= *(uint32 *) (m + 16)
  785. xorl 16(%esi),%eax
  786. # in5 ^= *(uint32 *) (m + 20)
  787. xorl 20(%esi),%ecx
  788. # *(uint32 *) (out + 16) = in4
  789. movl %eax,16(%edi)
  790. # *(uint32 *) (out + 20) = in5
  791. movl %ecx,20(%edi)
  792. # in6 = x6
  793. movl 124(%esp),%eax
  794. # in7 = x7
  795. movl 128(%esp),%ecx
  796. # in6 += j6
  797. addl 188(%esp),%eax
  798. # in7 += j7
  799. addl 192(%esp),%ecx
  800. # in6 ^= *(uint32 *) (m + 24)
  801. xorl 24(%esi),%eax
  802. # in7 ^= *(uint32 *) (m + 28)
  803. xorl 28(%esi),%ecx
  804. # *(uint32 *) (out + 24) = in6
  805. movl %eax,24(%edi)
  806. # *(uint32 *) (out + 28) = in7
  807. movl %ecx,28(%edi)
  808. # in8 = x8
  809. movl 132(%esp),%eax
  810. # in9 = x9
  811. movl 136(%esp),%ecx
  812. # in8 += j8
  813. addl 196(%esp),%eax
  814. # in9 += j9
  815. addl 200(%esp),%ecx
  816. # in8 ^= *(uint32 *) (m + 32)
  817. xorl 32(%esi),%eax
  818. # in9 ^= *(uint32 *) (m + 36)
  819. xorl 36(%esi),%ecx
  820. # *(uint32 *) (out + 32) = in8
  821. movl %eax,32(%edi)
  822. # *(uint32 *) (out + 36) = in9
  823. movl %ecx,36(%edi)
  824. # in10 = x10
  825. movl 140(%esp),%eax
  826. # in11 = x11
  827. movl 144(%esp),%ecx
  828. # in10 += j10
  829. addl 204(%esp),%eax
  830. # in11 += j11
  831. addl 208(%esp),%ecx
  832. # in10 ^= *(uint32 *) (m + 40)
  833. xorl 40(%esi),%eax
  834. # in11 ^= *(uint32 *) (m + 44)
  835. xorl 44(%esi),%ecx
  836. # *(uint32 *) (out + 40) = in10
  837. movl %eax,40(%edi)
  838. # *(uint32 *) (out + 44) = in11
  839. movl %ecx,44(%edi)
  840. # in12 = x12
  841. movl 148(%esp),%eax
  842. # in13 = x13
  843. movl 152(%esp),%ecx
  844. # in12 += j12
  845. addl 212(%esp),%eax
  846. # in13 += j13
  847. addl 216(%esp),%ecx
  848. # in12 ^= *(uint32 *) (m + 48)
  849. xorl 48(%esi),%eax
  850. # in13 ^= *(uint32 *) (m + 52)
  851. xorl 52(%esi),%ecx
  852. # *(uint32 *) (out + 48) = in12
  853. movl %eax,48(%edi)
  854. # *(uint32 *) (out + 52) = in13
  855. movl %ecx,52(%edi)
  856. # in14 = x14
  857. movl 156(%esp),%eax
  858. # in15 = x15
  859. movl 160(%esp),%ecx
  860. # in14 += j14
  861. addl 220(%esp),%eax
  862. # in15 += j15
  863. addl 224(%esp),%ecx
  864. # in14 ^= *(uint32 *) (m + 56)
  865. xorl 56(%esi),%eax
  866. # in15 ^= *(uint32 *) (m + 60)
  867. xorl 60(%esi),%ecx
  868. # *(uint32 *) (out + 56) = in14
  869. movl %eax,56(%edi)
  870. # *(uint32 *) (out + 60) = in15
  871. movl %ecx,60(%edi)
  872. # bytes = bytes_backup
  873. movl 76(%esp),%ebx
  874. # in8 = j8
  875. movl 196(%esp),%eax
  876. # in9 = j9
  877. movl 200(%esp),%ecx
  878. # in8 += 1
  879. add $1,%eax
  880. # in9 += 0 + carry
  881. adc $0,%ecx
  882. # j8 = in8
  883. movl %eax,196(%esp)
  884. # j9 = in9
  885. movl %ecx,200(%esp)
  886. # bytes - 64
  887. cmp $64,%ebx
  888. # goto bytesatleast65 if unsigned>
  889. ja ._bytesatleast65
  890. # goto bytesatleast64 if unsigned>=
  891. jae ._bytesatleast64
  892. # m = out
  893. mov %edi,%esi
  894. # out = ctarget
  895. movl 228(%esp),%edi
  896. # i = bytes
  897. mov %ebx,%ecx
  898. # while (i) { *out++ = *m++; --i }
  899. rep movsb
  900. ._bytesatleast64:
  901. # x = x_backup
  902. movl 64(%esp),%eax
  903. # in8 = j8
  904. movl 196(%esp),%ecx
  905. # in9 = j9
  906. movl 200(%esp),%edx
  907. # *(uint32 *) (x + 32) = in8
  908. movl %ecx,32(%eax)
  909. # *(uint32 *) (x + 36) = in9
  910. movl %edx,36(%eax)
  911. ._done:
  912. # eax = eax_stack
  913. movl 80(%esp),%eax
  914. # ebx = ebx_stack
  915. movl 84(%esp),%ebx
  916. # esi = esi_stack
  917. movl 88(%esp),%esi
  918. # edi = edi_stack
  919. movl 92(%esp),%edi
  920. # ebp = ebp_stack
  921. movl 96(%esp),%ebp
  922. # leave
  923. add %eax,%esp
  924. ret
  925. ._bytesatleast65:
  926. # bytes -= 64
  927. sub $64,%ebx
  928. # out += 64
  929. add $64,%edi
  930. # m += 64
  931. add $64,%esi
  932. # goto bytesatleast1
  933. jmp ._bytesatleast1
  934. ENDPROC(salsa20_encrypt_bytes)
  935. # enter salsa20_keysetup
  936. ENTRY(salsa20_keysetup)
  937. mov %esp,%eax
  938. and $31,%eax
  939. add $256,%eax
  940. sub %eax,%esp
  941. # eax_stack = eax
  942. movl %eax,64(%esp)
  943. # ebx_stack = ebx
  944. movl %ebx,68(%esp)
  945. # esi_stack = esi
  946. movl %esi,72(%esp)
  947. # edi_stack = edi
  948. movl %edi,76(%esp)
  949. # ebp_stack = ebp
  950. movl %ebp,80(%esp)
  951. # k = arg2
  952. movl 8(%esp,%eax),%ecx
  953. # kbits = arg3
  954. movl 12(%esp,%eax),%edx
  955. # x = arg1
  956. movl 4(%esp,%eax),%eax
  957. # in1 = *(uint32 *) (k + 0)
  958. movl 0(%ecx),%ebx
  959. # in2 = *(uint32 *) (k + 4)
  960. movl 4(%ecx),%esi
  961. # in3 = *(uint32 *) (k + 8)
  962. movl 8(%ecx),%edi
  963. # in4 = *(uint32 *) (k + 12)
  964. movl 12(%ecx),%ebp
  965. # *(uint32 *) (x + 4) = in1
  966. movl %ebx,4(%eax)
  967. # *(uint32 *) (x + 8) = in2
  968. movl %esi,8(%eax)
  969. # *(uint32 *) (x + 12) = in3
  970. movl %edi,12(%eax)
  971. # *(uint32 *) (x + 16) = in4
  972. movl %ebp,16(%eax)
  973. # kbits - 256
  974. cmp $256,%edx
  975. # goto kbits128 if unsigned<
  976. jb ._kbits128
  977. ._kbits256:
  978. # in11 = *(uint32 *) (k + 16)
  979. movl 16(%ecx),%edx
  980. # in12 = *(uint32 *) (k + 20)
  981. movl 20(%ecx),%ebx
  982. # in13 = *(uint32 *) (k + 24)
  983. movl 24(%ecx),%esi
  984. # in14 = *(uint32 *) (k + 28)
  985. movl 28(%ecx),%ecx
  986. # *(uint32 *) (x + 44) = in11
  987. movl %edx,44(%eax)
  988. # *(uint32 *) (x + 48) = in12
  989. movl %ebx,48(%eax)
  990. # *(uint32 *) (x + 52) = in13
  991. movl %esi,52(%eax)
  992. # *(uint32 *) (x + 56) = in14
  993. movl %ecx,56(%eax)
  994. # in0 = 1634760805
  995. mov $1634760805,%ecx
  996. # in5 = 857760878
  997. mov $857760878,%edx
  998. # in10 = 2036477234
  999. mov $2036477234,%ebx
  1000. # in15 = 1797285236
  1001. mov $1797285236,%esi
  1002. # *(uint32 *) (x + 0) = in0
  1003. movl %ecx,0(%eax)
  1004. # *(uint32 *) (x + 20) = in5
  1005. movl %edx,20(%eax)
  1006. # *(uint32 *) (x + 40) = in10
  1007. movl %ebx,40(%eax)
  1008. # *(uint32 *) (x + 60) = in15
  1009. movl %esi,60(%eax)
  1010. # goto keysetupdone
  1011. jmp ._keysetupdone
  1012. ._kbits128:
  1013. # in11 = *(uint32 *) (k + 0)
  1014. movl 0(%ecx),%edx
  1015. # in12 = *(uint32 *) (k + 4)
  1016. movl 4(%ecx),%ebx
  1017. # in13 = *(uint32 *) (k + 8)
  1018. movl 8(%ecx),%esi
  1019. # in14 = *(uint32 *) (k + 12)
  1020. movl 12(%ecx),%ecx
  1021. # *(uint32 *) (x + 44) = in11
  1022. movl %edx,44(%eax)
  1023. # *(uint32 *) (x + 48) = in12
  1024. movl %ebx,48(%eax)
  1025. # *(uint32 *) (x + 52) = in13
  1026. movl %esi,52(%eax)
  1027. # *(uint32 *) (x + 56) = in14
  1028. movl %ecx,56(%eax)
  1029. # in0 = 1634760805
  1030. mov $1634760805,%ecx
  1031. # in5 = 824206446
  1032. mov $824206446,%edx
  1033. # in10 = 2036477238
  1034. mov $2036477238,%ebx
  1035. # in15 = 1797285236
  1036. mov $1797285236,%esi
  1037. # *(uint32 *) (x + 0) = in0
  1038. movl %ecx,0(%eax)
  1039. # *(uint32 *) (x + 20) = in5
  1040. movl %edx,20(%eax)
  1041. # *(uint32 *) (x + 40) = in10
  1042. movl %ebx,40(%eax)
  1043. # *(uint32 *) (x + 60) = in15
  1044. movl %esi,60(%eax)
  1045. ._keysetupdone:
  1046. # eax = eax_stack
  1047. movl 64(%esp),%eax
  1048. # ebx = ebx_stack
  1049. movl 68(%esp),%ebx
  1050. # esi = esi_stack
  1051. movl 72(%esp),%esi
  1052. # edi = edi_stack
  1053. movl 76(%esp),%edi
  1054. # ebp = ebp_stack
  1055. movl 80(%esp),%ebp
  1056. # leave
  1057. add %eax,%esp
  1058. ret
  1059. ENDPROC(salsa20_keysetup)
  1060. # enter salsa20_ivsetup
  1061. ENTRY(salsa20_ivsetup)
  1062. mov %esp,%eax
  1063. and $31,%eax
  1064. add $256,%eax
  1065. sub %eax,%esp
  1066. # eax_stack = eax
  1067. movl %eax,64(%esp)
  1068. # ebx_stack = ebx
  1069. movl %ebx,68(%esp)
  1070. # esi_stack = esi
  1071. movl %esi,72(%esp)
  1072. # edi_stack = edi
  1073. movl %edi,76(%esp)
  1074. # ebp_stack = ebp
  1075. movl %ebp,80(%esp)
  1076. # iv = arg2
  1077. movl 8(%esp,%eax),%ecx
  1078. # x = arg1
  1079. movl 4(%esp,%eax),%eax
  1080. # in6 = *(uint32 *) (iv + 0)
  1081. movl 0(%ecx),%edx
  1082. # in7 = *(uint32 *) (iv + 4)
  1083. movl 4(%ecx),%ecx
  1084. # in8 = 0
  1085. mov $0,%ebx
  1086. # in9 = 0
  1087. mov $0,%esi
  1088. # *(uint32 *) (x + 24) = in6
  1089. movl %edx,24(%eax)
  1090. # *(uint32 *) (x + 28) = in7
  1091. movl %ecx,28(%eax)
  1092. # *(uint32 *) (x + 32) = in8
  1093. movl %ebx,32(%eax)
  1094. # *(uint32 *) (x + 36) = in9
  1095. movl %esi,36(%eax)
  1096. # eax = eax_stack
  1097. movl 64(%esp),%eax
  1098. # ebx = ebx_stack
  1099. movl 68(%esp),%ebx
  1100. # esi = esi_stack
  1101. movl 72(%esp),%esi
  1102. # edi = edi_stack
  1103. movl 76(%esp),%edi
  1104. # ebp = ebp_stack
  1105. movl 80(%esp),%ebp
  1106. # leave
  1107. add %eax,%esp
  1108. ret
  1109. ENDPROC(salsa20_ivsetup)