salsa20-x86_64-asm_64.S 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. # enter ECRYPT_encrypt_bytes
  2. .text
  3. .p2align 5
  4. .globl ECRYPT_encrypt_bytes
  5. ECRYPT_encrypt_bytes:
  6. mov %rsp,%r11
  7. and $31,%r11
  8. add $256,%r11
  9. sub %r11,%rsp
  10. # x = arg1
  11. mov %rdi,%r8
  12. # m = arg2
  13. mov %rsi,%rsi
  14. # out = arg3
  15. mov %rdx,%rdi
  16. # bytes = arg4
  17. mov %rcx,%rdx
  18. # unsigned>? bytes - 0
  19. cmp $0,%rdx
  20. # comment:fp stack unchanged by jump
  21. # goto done if !unsigned>
  22. jbe ._done
  23. # comment:fp stack unchanged by fallthrough
  24. # start:
  25. ._start:
  26. # r11_stack = r11
  27. movq %r11,0(%rsp)
  28. # r12_stack = r12
  29. movq %r12,8(%rsp)
  30. # r13_stack = r13
  31. movq %r13,16(%rsp)
  32. # r14_stack = r14
  33. movq %r14,24(%rsp)
  34. # r15_stack = r15
  35. movq %r15,32(%rsp)
  36. # rbx_stack = rbx
  37. movq %rbx,40(%rsp)
  38. # rbp_stack = rbp
  39. movq %rbp,48(%rsp)
  40. # in0 = *(uint64 *) (x + 0)
  41. movq 0(%r8),%rcx
  42. # in2 = *(uint64 *) (x + 8)
  43. movq 8(%r8),%r9
  44. # in4 = *(uint64 *) (x + 16)
  45. movq 16(%r8),%rax
  46. # in6 = *(uint64 *) (x + 24)
  47. movq 24(%r8),%r10
  48. # in8 = *(uint64 *) (x + 32)
  49. movq 32(%r8),%r11
  50. # in10 = *(uint64 *) (x + 40)
  51. movq 40(%r8),%r12
  52. # in12 = *(uint64 *) (x + 48)
  53. movq 48(%r8),%r13
  54. # in14 = *(uint64 *) (x + 56)
  55. movq 56(%r8),%r14
  56. # j0 = in0
  57. movq %rcx,56(%rsp)
  58. # j2 = in2
  59. movq %r9,64(%rsp)
  60. # j4 = in4
  61. movq %rax,72(%rsp)
  62. # j6 = in6
  63. movq %r10,80(%rsp)
  64. # j8 = in8
  65. movq %r11,88(%rsp)
  66. # j10 = in10
  67. movq %r12,96(%rsp)
  68. # j12 = in12
  69. movq %r13,104(%rsp)
  70. # j14 = in14
  71. movq %r14,112(%rsp)
  72. # x_backup = x
  73. movq %r8,120(%rsp)
  74. # bytesatleast1:
  75. ._bytesatleast1:
  76. # unsigned<? bytes - 64
  77. cmp $64,%rdx
  78. # comment:fp stack unchanged by jump
  79. # goto nocopy if !unsigned<
  80. jae ._nocopy
  81. # ctarget = out
  82. movq %rdi,128(%rsp)
  83. # out = &tmp
  84. leaq 192(%rsp),%rdi
  85. # i = bytes
  86. mov %rdx,%rcx
  87. # while (i) { *out++ = *m++; --i }
  88. rep movsb
  89. # out = &tmp
  90. leaq 192(%rsp),%rdi
  91. # m = &tmp
  92. leaq 192(%rsp),%rsi
  93. # comment:fp stack unchanged by fallthrough
  94. # nocopy:
  95. ._nocopy:
  96. # out_backup = out
  97. movq %rdi,136(%rsp)
  98. # m_backup = m
  99. movq %rsi,144(%rsp)
  100. # bytes_backup = bytes
  101. movq %rdx,152(%rsp)
  102. # x1 = j0
  103. movq 56(%rsp),%rdi
  104. # x0 = x1
  105. mov %rdi,%rdx
  106. # (uint64) x1 >>= 32
  107. shr $32,%rdi
  108. # x3 = j2
  109. movq 64(%rsp),%rsi
  110. # x2 = x3
  111. mov %rsi,%rcx
  112. # (uint64) x3 >>= 32
  113. shr $32,%rsi
  114. # x5 = j4
  115. movq 72(%rsp),%r8
  116. # x4 = x5
  117. mov %r8,%r9
  118. # (uint64) x5 >>= 32
  119. shr $32,%r8
  120. # x5_stack = x5
  121. movq %r8,160(%rsp)
  122. # x7 = j6
  123. movq 80(%rsp),%r8
  124. # x6 = x7
  125. mov %r8,%rax
  126. # (uint64) x7 >>= 32
  127. shr $32,%r8
  128. # x9 = j8
  129. movq 88(%rsp),%r10
  130. # x8 = x9
  131. mov %r10,%r11
  132. # (uint64) x9 >>= 32
  133. shr $32,%r10
  134. # x11 = j10
  135. movq 96(%rsp),%r12
  136. # x10 = x11
  137. mov %r12,%r13
  138. # x10_stack = x10
  139. movq %r13,168(%rsp)
  140. # (uint64) x11 >>= 32
  141. shr $32,%r12
  142. # x13 = j12
  143. movq 104(%rsp),%r13
  144. # x12 = x13
  145. mov %r13,%r14
  146. # (uint64) x13 >>= 32
  147. shr $32,%r13
  148. # x15 = j14
  149. movq 112(%rsp),%r15
  150. # x14 = x15
  151. mov %r15,%rbx
  152. # (uint64) x15 >>= 32
  153. shr $32,%r15
  154. # x15_stack = x15
  155. movq %r15,176(%rsp)
  156. # i = 20
  157. mov $20,%r15
  158. # mainloop:
  159. ._mainloop:
  160. # i_backup = i
  161. movq %r15,184(%rsp)
  162. # x5 = x5_stack
  163. movq 160(%rsp),%r15
  164. # a = x12 + x0
  165. lea (%r14,%rdx),%rbp
  166. # (uint32) a <<<= 7
  167. rol $7,%ebp
  168. # x4 ^= a
  169. xor %rbp,%r9
  170. # b = x1 + x5
  171. lea (%rdi,%r15),%rbp
  172. # (uint32) b <<<= 7
  173. rol $7,%ebp
  174. # x9 ^= b
  175. xor %rbp,%r10
  176. # a = x0 + x4
  177. lea (%rdx,%r9),%rbp
  178. # (uint32) a <<<= 9
  179. rol $9,%ebp
  180. # x8 ^= a
  181. xor %rbp,%r11
  182. # b = x5 + x9
  183. lea (%r15,%r10),%rbp
  184. # (uint32) b <<<= 9
  185. rol $9,%ebp
  186. # x13 ^= b
  187. xor %rbp,%r13
  188. # a = x4 + x8
  189. lea (%r9,%r11),%rbp
  190. # (uint32) a <<<= 13
  191. rol $13,%ebp
  192. # x12 ^= a
  193. xor %rbp,%r14
  194. # b = x9 + x13
  195. lea (%r10,%r13),%rbp
  196. # (uint32) b <<<= 13
  197. rol $13,%ebp
  198. # x1 ^= b
  199. xor %rbp,%rdi
  200. # a = x8 + x12
  201. lea (%r11,%r14),%rbp
  202. # (uint32) a <<<= 18
  203. rol $18,%ebp
  204. # x0 ^= a
  205. xor %rbp,%rdx
  206. # b = x13 + x1
  207. lea (%r13,%rdi),%rbp
  208. # (uint32) b <<<= 18
  209. rol $18,%ebp
  210. # x5 ^= b
  211. xor %rbp,%r15
  212. # x10 = x10_stack
  213. movq 168(%rsp),%rbp
  214. # x5_stack = x5
  215. movq %r15,160(%rsp)
  216. # c = x6 + x10
  217. lea (%rax,%rbp),%r15
  218. # (uint32) c <<<= 7
  219. rol $7,%r15d
  220. # x14 ^= c
  221. xor %r15,%rbx
  222. # c = x10 + x14
  223. lea (%rbp,%rbx),%r15
  224. # (uint32) c <<<= 9
  225. rol $9,%r15d
  226. # x2 ^= c
  227. xor %r15,%rcx
  228. # c = x14 + x2
  229. lea (%rbx,%rcx),%r15
  230. # (uint32) c <<<= 13
  231. rol $13,%r15d
  232. # x6 ^= c
  233. xor %r15,%rax
  234. # c = x2 + x6
  235. lea (%rcx,%rax),%r15
  236. # (uint32) c <<<= 18
  237. rol $18,%r15d
  238. # x10 ^= c
  239. xor %r15,%rbp
  240. # x15 = x15_stack
  241. movq 176(%rsp),%r15
  242. # x10_stack = x10
  243. movq %rbp,168(%rsp)
  244. # d = x11 + x15
  245. lea (%r12,%r15),%rbp
  246. # (uint32) d <<<= 7
  247. rol $7,%ebp
  248. # x3 ^= d
  249. xor %rbp,%rsi
  250. # d = x15 + x3
  251. lea (%r15,%rsi),%rbp
  252. # (uint32) d <<<= 9
  253. rol $9,%ebp
  254. # x7 ^= d
  255. xor %rbp,%r8
  256. # d = x3 + x7
  257. lea (%rsi,%r8),%rbp
  258. # (uint32) d <<<= 13
  259. rol $13,%ebp
  260. # x11 ^= d
  261. xor %rbp,%r12
  262. # d = x7 + x11
  263. lea (%r8,%r12),%rbp
  264. # (uint32) d <<<= 18
  265. rol $18,%ebp
  266. # x15 ^= d
  267. xor %rbp,%r15
  268. # x15_stack = x15
  269. movq %r15,176(%rsp)
  270. # x5 = x5_stack
  271. movq 160(%rsp),%r15
  272. # a = x3 + x0
  273. lea (%rsi,%rdx),%rbp
  274. # (uint32) a <<<= 7
  275. rol $7,%ebp
  276. # x1 ^= a
  277. xor %rbp,%rdi
  278. # b = x4 + x5
  279. lea (%r9,%r15),%rbp
  280. # (uint32) b <<<= 7
  281. rol $7,%ebp
  282. # x6 ^= b
  283. xor %rbp,%rax
  284. # a = x0 + x1
  285. lea (%rdx,%rdi),%rbp
  286. # (uint32) a <<<= 9
  287. rol $9,%ebp
  288. # x2 ^= a
  289. xor %rbp,%rcx
  290. # b = x5 + x6
  291. lea (%r15,%rax),%rbp
  292. # (uint32) b <<<= 9
  293. rol $9,%ebp
  294. # x7 ^= b
  295. xor %rbp,%r8
  296. # a = x1 + x2
  297. lea (%rdi,%rcx),%rbp
  298. # (uint32) a <<<= 13
  299. rol $13,%ebp
  300. # x3 ^= a
  301. xor %rbp,%rsi
  302. # b = x6 + x7
  303. lea (%rax,%r8),%rbp
  304. # (uint32) b <<<= 13
  305. rol $13,%ebp
  306. # x4 ^= b
  307. xor %rbp,%r9
  308. # a = x2 + x3
  309. lea (%rcx,%rsi),%rbp
  310. # (uint32) a <<<= 18
  311. rol $18,%ebp
  312. # x0 ^= a
  313. xor %rbp,%rdx
  314. # b = x7 + x4
  315. lea (%r8,%r9),%rbp
  316. # (uint32) b <<<= 18
  317. rol $18,%ebp
  318. # x5 ^= b
  319. xor %rbp,%r15
  320. # x10 = x10_stack
  321. movq 168(%rsp),%rbp
  322. # x5_stack = x5
  323. movq %r15,160(%rsp)
  324. # c = x9 + x10
  325. lea (%r10,%rbp),%r15
  326. # (uint32) c <<<= 7
  327. rol $7,%r15d
  328. # x11 ^= c
  329. xor %r15,%r12
  330. # c = x10 + x11
  331. lea (%rbp,%r12),%r15
  332. # (uint32) c <<<= 9
  333. rol $9,%r15d
  334. # x8 ^= c
  335. xor %r15,%r11
  336. # c = x11 + x8
  337. lea (%r12,%r11),%r15
  338. # (uint32) c <<<= 13
  339. rol $13,%r15d
  340. # x9 ^= c
  341. xor %r15,%r10
  342. # c = x8 + x9
  343. lea (%r11,%r10),%r15
  344. # (uint32) c <<<= 18
  345. rol $18,%r15d
  346. # x10 ^= c
  347. xor %r15,%rbp
  348. # x15 = x15_stack
  349. movq 176(%rsp),%r15
  350. # x10_stack = x10
  351. movq %rbp,168(%rsp)
  352. # d = x14 + x15
  353. lea (%rbx,%r15),%rbp
  354. # (uint32) d <<<= 7
  355. rol $7,%ebp
  356. # x12 ^= d
  357. xor %rbp,%r14
  358. # d = x15 + x12
  359. lea (%r15,%r14),%rbp
  360. # (uint32) d <<<= 9
  361. rol $9,%ebp
  362. # x13 ^= d
  363. xor %rbp,%r13
  364. # d = x12 + x13
  365. lea (%r14,%r13),%rbp
  366. # (uint32) d <<<= 13
  367. rol $13,%ebp
  368. # x14 ^= d
  369. xor %rbp,%rbx
  370. # d = x13 + x14
  371. lea (%r13,%rbx),%rbp
  372. # (uint32) d <<<= 18
  373. rol $18,%ebp
  374. # x15 ^= d
  375. xor %rbp,%r15
  376. # x15_stack = x15
  377. movq %r15,176(%rsp)
  378. # x5 = x5_stack
  379. movq 160(%rsp),%r15
  380. # a = x12 + x0
  381. lea (%r14,%rdx),%rbp
  382. # (uint32) a <<<= 7
  383. rol $7,%ebp
  384. # x4 ^= a
  385. xor %rbp,%r9
  386. # b = x1 + x5
  387. lea (%rdi,%r15),%rbp
  388. # (uint32) b <<<= 7
  389. rol $7,%ebp
  390. # x9 ^= b
  391. xor %rbp,%r10
  392. # a = x0 + x4
  393. lea (%rdx,%r9),%rbp
  394. # (uint32) a <<<= 9
  395. rol $9,%ebp
  396. # x8 ^= a
  397. xor %rbp,%r11
  398. # b = x5 + x9
  399. lea (%r15,%r10),%rbp
  400. # (uint32) b <<<= 9
  401. rol $9,%ebp
  402. # x13 ^= b
  403. xor %rbp,%r13
  404. # a = x4 + x8
  405. lea (%r9,%r11),%rbp
  406. # (uint32) a <<<= 13
  407. rol $13,%ebp
  408. # x12 ^= a
  409. xor %rbp,%r14
  410. # b = x9 + x13
  411. lea (%r10,%r13),%rbp
  412. # (uint32) b <<<= 13
  413. rol $13,%ebp
  414. # x1 ^= b
  415. xor %rbp,%rdi
  416. # a = x8 + x12
  417. lea (%r11,%r14),%rbp
  418. # (uint32) a <<<= 18
  419. rol $18,%ebp
  420. # x0 ^= a
  421. xor %rbp,%rdx
  422. # b = x13 + x1
  423. lea (%r13,%rdi),%rbp
  424. # (uint32) b <<<= 18
  425. rol $18,%ebp
  426. # x5 ^= b
  427. xor %rbp,%r15
  428. # x10 = x10_stack
  429. movq 168(%rsp),%rbp
  430. # x5_stack = x5
  431. movq %r15,160(%rsp)
  432. # c = x6 + x10
  433. lea (%rax,%rbp),%r15
  434. # (uint32) c <<<= 7
  435. rol $7,%r15d
  436. # x14 ^= c
  437. xor %r15,%rbx
  438. # c = x10 + x14
  439. lea (%rbp,%rbx),%r15
  440. # (uint32) c <<<= 9
  441. rol $9,%r15d
  442. # x2 ^= c
  443. xor %r15,%rcx
  444. # c = x14 + x2
  445. lea (%rbx,%rcx),%r15
  446. # (uint32) c <<<= 13
  447. rol $13,%r15d
  448. # x6 ^= c
  449. xor %r15,%rax
  450. # c = x2 + x6
  451. lea (%rcx,%rax),%r15
  452. # (uint32) c <<<= 18
  453. rol $18,%r15d
  454. # x10 ^= c
  455. xor %r15,%rbp
  456. # x15 = x15_stack
  457. movq 176(%rsp),%r15
  458. # x10_stack = x10
  459. movq %rbp,168(%rsp)
  460. # d = x11 + x15
  461. lea (%r12,%r15),%rbp
  462. # (uint32) d <<<= 7
  463. rol $7,%ebp
  464. # x3 ^= d
  465. xor %rbp,%rsi
  466. # d = x15 + x3
  467. lea (%r15,%rsi),%rbp
  468. # (uint32) d <<<= 9
  469. rol $9,%ebp
  470. # x7 ^= d
  471. xor %rbp,%r8
  472. # d = x3 + x7
  473. lea (%rsi,%r8),%rbp
  474. # (uint32) d <<<= 13
  475. rol $13,%ebp
  476. # x11 ^= d
  477. xor %rbp,%r12
  478. # d = x7 + x11
  479. lea (%r8,%r12),%rbp
  480. # (uint32) d <<<= 18
  481. rol $18,%ebp
  482. # x15 ^= d
  483. xor %rbp,%r15
  484. # x15_stack = x15
  485. movq %r15,176(%rsp)
  486. # x5 = x5_stack
  487. movq 160(%rsp),%r15
  488. # a = x3 + x0
  489. lea (%rsi,%rdx),%rbp
  490. # (uint32) a <<<= 7
  491. rol $7,%ebp
  492. # x1 ^= a
  493. xor %rbp,%rdi
  494. # b = x4 + x5
  495. lea (%r9,%r15),%rbp
  496. # (uint32) b <<<= 7
  497. rol $7,%ebp
  498. # x6 ^= b
  499. xor %rbp,%rax
  500. # a = x0 + x1
  501. lea (%rdx,%rdi),%rbp
  502. # (uint32) a <<<= 9
  503. rol $9,%ebp
  504. # x2 ^= a
  505. xor %rbp,%rcx
  506. # b = x5 + x6
  507. lea (%r15,%rax),%rbp
  508. # (uint32) b <<<= 9
  509. rol $9,%ebp
  510. # x7 ^= b
  511. xor %rbp,%r8
  512. # a = x1 + x2
  513. lea (%rdi,%rcx),%rbp
  514. # (uint32) a <<<= 13
  515. rol $13,%ebp
  516. # x3 ^= a
  517. xor %rbp,%rsi
  518. # b = x6 + x7
  519. lea (%rax,%r8),%rbp
  520. # (uint32) b <<<= 13
  521. rol $13,%ebp
  522. # x4 ^= b
  523. xor %rbp,%r9
  524. # a = x2 + x3
  525. lea (%rcx,%rsi),%rbp
  526. # (uint32) a <<<= 18
  527. rol $18,%ebp
  528. # x0 ^= a
  529. xor %rbp,%rdx
  530. # b = x7 + x4
  531. lea (%r8,%r9),%rbp
  532. # (uint32) b <<<= 18
  533. rol $18,%ebp
  534. # x5 ^= b
  535. xor %rbp,%r15
  536. # x10 = x10_stack
  537. movq 168(%rsp),%rbp
  538. # x5_stack = x5
  539. movq %r15,160(%rsp)
  540. # c = x9 + x10
  541. lea (%r10,%rbp),%r15
  542. # (uint32) c <<<= 7
  543. rol $7,%r15d
  544. # x11 ^= c
  545. xor %r15,%r12
  546. # c = x10 + x11
  547. lea (%rbp,%r12),%r15
  548. # (uint32) c <<<= 9
  549. rol $9,%r15d
  550. # x8 ^= c
  551. xor %r15,%r11
  552. # c = x11 + x8
  553. lea (%r12,%r11),%r15
  554. # (uint32) c <<<= 13
  555. rol $13,%r15d
  556. # x9 ^= c
  557. xor %r15,%r10
  558. # c = x8 + x9
  559. lea (%r11,%r10),%r15
  560. # (uint32) c <<<= 18
  561. rol $18,%r15d
  562. # x10 ^= c
  563. xor %r15,%rbp
  564. # x15 = x15_stack
  565. movq 176(%rsp),%r15
  566. # x10_stack = x10
  567. movq %rbp,168(%rsp)
  568. # d = x14 + x15
  569. lea (%rbx,%r15),%rbp
  570. # (uint32) d <<<= 7
  571. rol $7,%ebp
  572. # x12 ^= d
  573. xor %rbp,%r14
  574. # d = x15 + x12
  575. lea (%r15,%r14),%rbp
  576. # (uint32) d <<<= 9
  577. rol $9,%ebp
  578. # x13 ^= d
  579. xor %rbp,%r13
  580. # d = x12 + x13
  581. lea (%r14,%r13),%rbp
  582. # (uint32) d <<<= 13
  583. rol $13,%ebp
  584. # x14 ^= d
  585. xor %rbp,%rbx
  586. # d = x13 + x14
  587. lea (%r13,%rbx),%rbp
  588. # (uint32) d <<<= 18
  589. rol $18,%ebp
  590. # x15 ^= d
  591. xor %rbp,%r15
  592. # x15_stack = x15
  593. movq %r15,176(%rsp)
  594. # i = i_backup
  595. movq 184(%rsp),%r15
  596. # unsigned>? i -= 4
  597. sub $4,%r15
  598. # comment:fp stack unchanged by jump
  599. # goto mainloop if unsigned>
  600. ja ._mainloop
  601. # (uint32) x2 += j2
  602. addl 64(%rsp),%ecx
  603. # x3 <<= 32
  604. shl $32,%rsi
  605. # x3 += j2
  606. addq 64(%rsp),%rsi
  607. # (uint64) x3 >>= 32
  608. shr $32,%rsi
  609. # x3 <<= 32
  610. shl $32,%rsi
  611. # x2 += x3
  612. add %rsi,%rcx
  613. # (uint32) x6 += j6
  614. addl 80(%rsp),%eax
  615. # x7 <<= 32
  616. shl $32,%r8
  617. # x7 += j6
  618. addq 80(%rsp),%r8
  619. # (uint64) x7 >>= 32
  620. shr $32,%r8
  621. # x7 <<= 32
  622. shl $32,%r8
  623. # x6 += x7
  624. add %r8,%rax
  625. # (uint32) x8 += j8
  626. addl 88(%rsp),%r11d
  627. # x9 <<= 32
  628. shl $32,%r10
  629. # x9 += j8
  630. addq 88(%rsp),%r10
  631. # (uint64) x9 >>= 32
  632. shr $32,%r10
  633. # x9 <<= 32
  634. shl $32,%r10
  635. # x8 += x9
  636. add %r10,%r11
  637. # (uint32) x12 += j12
  638. addl 104(%rsp),%r14d
  639. # x13 <<= 32
  640. shl $32,%r13
  641. # x13 += j12
  642. addq 104(%rsp),%r13
  643. # (uint64) x13 >>= 32
  644. shr $32,%r13
  645. # x13 <<= 32
  646. shl $32,%r13
  647. # x12 += x13
  648. add %r13,%r14
  649. # (uint32) x0 += j0
  650. addl 56(%rsp),%edx
  651. # x1 <<= 32
  652. shl $32,%rdi
  653. # x1 += j0
  654. addq 56(%rsp),%rdi
  655. # (uint64) x1 >>= 32
  656. shr $32,%rdi
  657. # x1 <<= 32
  658. shl $32,%rdi
  659. # x0 += x1
  660. add %rdi,%rdx
  661. # x5 = x5_stack
  662. movq 160(%rsp),%rdi
  663. # (uint32) x4 += j4
  664. addl 72(%rsp),%r9d
  665. # x5 <<= 32
  666. shl $32,%rdi
  667. # x5 += j4
  668. addq 72(%rsp),%rdi
  669. # (uint64) x5 >>= 32
  670. shr $32,%rdi
  671. # x5 <<= 32
  672. shl $32,%rdi
  673. # x4 += x5
  674. add %rdi,%r9
  675. # x10 = x10_stack
  676. movq 168(%rsp),%r8
  677. # (uint32) x10 += j10
  678. addl 96(%rsp),%r8d
  679. # x11 <<= 32
  680. shl $32,%r12
  681. # x11 += j10
  682. addq 96(%rsp),%r12
  683. # (uint64) x11 >>= 32
  684. shr $32,%r12
  685. # x11 <<= 32
  686. shl $32,%r12
  687. # x10 += x11
  688. add %r12,%r8
  689. # x15 = x15_stack
  690. movq 176(%rsp),%rdi
  691. # (uint32) x14 += j14
  692. addl 112(%rsp),%ebx
  693. # x15 <<= 32
  694. shl $32,%rdi
  695. # x15 += j14
  696. addq 112(%rsp),%rdi
  697. # (uint64) x15 >>= 32
  698. shr $32,%rdi
  699. # x15 <<= 32
  700. shl $32,%rdi
  701. # x14 += x15
  702. add %rdi,%rbx
  703. # out = out_backup
  704. movq 136(%rsp),%rdi
  705. # m = m_backup
  706. movq 144(%rsp),%rsi
  707. # x0 ^= *(uint64 *) (m + 0)
  708. xorq 0(%rsi),%rdx
  709. # *(uint64 *) (out + 0) = x0
  710. movq %rdx,0(%rdi)
  711. # x2 ^= *(uint64 *) (m + 8)
  712. xorq 8(%rsi),%rcx
  713. # *(uint64 *) (out + 8) = x2
  714. movq %rcx,8(%rdi)
  715. # x4 ^= *(uint64 *) (m + 16)
  716. xorq 16(%rsi),%r9
  717. # *(uint64 *) (out + 16) = x4
  718. movq %r9,16(%rdi)
  719. # x6 ^= *(uint64 *) (m + 24)
  720. xorq 24(%rsi),%rax
  721. # *(uint64 *) (out + 24) = x6
  722. movq %rax,24(%rdi)
  723. # x8 ^= *(uint64 *) (m + 32)
  724. xorq 32(%rsi),%r11
  725. # *(uint64 *) (out + 32) = x8
  726. movq %r11,32(%rdi)
  727. # x10 ^= *(uint64 *) (m + 40)
  728. xorq 40(%rsi),%r8
  729. # *(uint64 *) (out + 40) = x10
  730. movq %r8,40(%rdi)
  731. # x12 ^= *(uint64 *) (m + 48)
  732. xorq 48(%rsi),%r14
  733. # *(uint64 *) (out + 48) = x12
  734. movq %r14,48(%rdi)
  735. # x14 ^= *(uint64 *) (m + 56)
  736. xorq 56(%rsi),%rbx
  737. # *(uint64 *) (out + 56) = x14
  738. movq %rbx,56(%rdi)
  739. # bytes = bytes_backup
  740. movq 152(%rsp),%rdx
  741. # in8 = j8
  742. movq 88(%rsp),%rcx
  743. # in8 += 1
  744. add $1,%rcx
  745. # j8 = in8
  746. movq %rcx,88(%rsp)
  747. # unsigned>? unsigned<? bytes - 64
  748. cmp $64,%rdx
  749. # comment:fp stack unchanged by jump
  750. # goto bytesatleast65 if unsigned>
  751. ja ._bytesatleast65
  752. # comment:fp stack unchanged by jump
  753. # goto bytesatleast64 if !unsigned<
  754. jae ._bytesatleast64
  755. # m = out
  756. mov %rdi,%rsi
  757. # out = ctarget
  758. movq 128(%rsp),%rdi
  759. # i = bytes
  760. mov %rdx,%rcx
  761. # while (i) { *out++ = *m++; --i }
  762. rep movsb
  763. # comment:fp stack unchanged by fallthrough
  764. # bytesatleast64:
  765. ._bytesatleast64:
  766. # x = x_backup
  767. movq 120(%rsp),%rdi
  768. # in8 = j8
  769. movq 88(%rsp),%rsi
  770. # *(uint64 *) (x + 32) = in8
  771. movq %rsi,32(%rdi)
  772. # r11 = r11_stack
  773. movq 0(%rsp),%r11
  774. # r12 = r12_stack
  775. movq 8(%rsp),%r12
  776. # r13 = r13_stack
  777. movq 16(%rsp),%r13
  778. # r14 = r14_stack
  779. movq 24(%rsp),%r14
  780. # r15 = r15_stack
  781. movq 32(%rsp),%r15
  782. # rbx = rbx_stack
  783. movq 40(%rsp),%rbx
  784. # rbp = rbp_stack
  785. movq 48(%rsp),%rbp
  786. # comment:fp stack unchanged by fallthrough
  787. # done:
  788. ._done:
  789. # leave
  790. add %r11,%rsp
  791. mov %rdi,%rax
  792. mov %rsi,%rdx
  793. ret
  794. # bytesatleast65:
  795. ._bytesatleast65:
  796. # bytes -= 64
  797. sub $64,%rdx
  798. # out += 64
  799. add $64,%rdi
  800. # m += 64
  801. add $64,%rsi
  802. # comment:fp stack unchanged by jump
  803. # goto bytesatleast1
  804. jmp ._bytesatleast1
  805. # enter ECRYPT_keysetup
  806. .text
  807. .p2align 5
  808. .globl ECRYPT_keysetup
  809. ECRYPT_keysetup:
  810. mov %rsp,%r11
  811. and $31,%r11
  812. add $256,%r11
  813. sub %r11,%rsp
  814. # k = arg2
  815. mov %rsi,%rsi
  816. # kbits = arg3
  817. mov %rdx,%rdx
  818. # x = arg1
  819. mov %rdi,%rdi
  820. # in0 = *(uint64 *) (k + 0)
  821. movq 0(%rsi),%r8
  822. # in2 = *(uint64 *) (k + 8)
  823. movq 8(%rsi),%r9
  824. # *(uint64 *) (x + 4) = in0
  825. movq %r8,4(%rdi)
  826. # *(uint64 *) (x + 12) = in2
  827. movq %r9,12(%rdi)
  828. # unsigned<? kbits - 256
  829. cmp $256,%rdx
  830. # comment:fp stack unchanged by jump
  831. # goto kbits128 if unsigned<
  832. jb ._kbits128
  833. # kbits256:
  834. ._kbits256:
  835. # in10 = *(uint64 *) (k + 16)
  836. movq 16(%rsi),%rdx
  837. # in12 = *(uint64 *) (k + 24)
  838. movq 24(%rsi),%rsi
  839. # *(uint64 *) (x + 44) = in10
  840. movq %rdx,44(%rdi)
  841. # *(uint64 *) (x + 52) = in12
  842. movq %rsi,52(%rdi)
  843. # in0 = 1634760805
  844. mov $1634760805,%rsi
  845. # in4 = 857760878
  846. mov $857760878,%rdx
  847. # in10 = 2036477234
  848. mov $2036477234,%rcx
  849. # in14 = 1797285236
  850. mov $1797285236,%r8
  851. # *(uint32 *) (x + 0) = in0
  852. movl %esi,0(%rdi)
  853. # *(uint32 *) (x + 20) = in4
  854. movl %edx,20(%rdi)
  855. # *(uint32 *) (x + 40) = in10
  856. movl %ecx,40(%rdi)
  857. # *(uint32 *) (x + 60) = in14
  858. movl %r8d,60(%rdi)
  859. # comment:fp stack unchanged by jump
  860. # goto keysetupdone
  861. jmp ._keysetupdone
  862. # kbits128:
  863. ._kbits128:
  864. # in10 = *(uint64 *) (k + 0)
  865. movq 0(%rsi),%rdx
  866. # in12 = *(uint64 *) (k + 8)
  867. movq 8(%rsi),%rsi
  868. # *(uint64 *) (x + 44) = in10
  869. movq %rdx,44(%rdi)
  870. # *(uint64 *) (x + 52) = in12
  871. movq %rsi,52(%rdi)
  872. # in0 = 1634760805
  873. mov $1634760805,%rsi
  874. # in4 = 824206446
  875. mov $824206446,%rdx
  876. # in10 = 2036477238
  877. mov $2036477238,%rcx
  878. # in14 = 1797285236
  879. mov $1797285236,%r8
  880. # *(uint32 *) (x + 0) = in0
  881. movl %esi,0(%rdi)
  882. # *(uint32 *) (x + 20) = in4
  883. movl %edx,20(%rdi)
  884. # *(uint32 *) (x + 40) = in10
  885. movl %ecx,40(%rdi)
  886. # *(uint32 *) (x + 60) = in14
  887. movl %r8d,60(%rdi)
  888. # keysetupdone:
  889. ._keysetupdone:
  890. # leave
  891. add %r11,%rsp
  892. mov %rdi,%rax
  893. mov %rsi,%rdx
  894. ret
  895. # enter ECRYPT_ivsetup
  896. .text
  897. .p2align 5
  898. .globl ECRYPT_ivsetup
  899. ECRYPT_ivsetup:
  900. mov %rsp,%r11
  901. and $31,%r11
  902. add $256,%r11
  903. sub %r11,%rsp
  904. # iv = arg2
  905. mov %rsi,%rsi
  906. # x = arg1
  907. mov %rdi,%rdi
  908. # in6 = *(uint64 *) (iv + 0)
  909. movq 0(%rsi),%rsi
  910. # in8 = 0
  911. mov $0,%r8
  912. # *(uint64 *) (x + 24) = in6
  913. movq %rsi,24(%rdi)
  914. # *(uint64 *) (x + 32) = in8
  915. movq %r8,32(%rdi)
  916. # leave
  917. add %r11,%rsp
  918. mov %rdi,%rax
  919. mov %rsi,%rdx
  920. ret