aes-i586-asm_32.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. // -------------------------------------------------------------------------
  2. // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
  3. // All rights reserved.
  4. //
  5. // LICENSE TERMS
  6. //
  7. // The free distribution and use of this software in both source and binary
  8. // form is allowed (with or without changes) provided that:
  9. //
  10. // 1. distributions of this source code include the above copyright
  11. // notice, this list of conditions and the following disclaimer//
  12. //
  13. // 2. distributions in binary form include the above copyright
  14. // notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other associated materials//
  16. //
  17. // 3. the copyright holder's name is not used to endorse products
  18. // built using this software without specific written permission.
  19. //
  20. //
  21. // ALTERNATIVELY, provided that this notice is retained in full, this product
  22. // may be distributed under the terms of the GNU General Public License (GPL),
  23. // in which case the provisions of the GPL apply INSTEAD OF those given above.
  24. //
  25. // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
  26. // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
  27. // DISCLAIMER
  28. //
  29. // This software is provided 'as is' with no explicit or implied warranties
  30. // in respect of its properties including, but not limited to, correctness
  31. // and fitness for purpose.
  32. // -------------------------------------------------------------------------
  33. // Issue Date: 29/07/2002
  34. .file "aes-i586-asm.S"
  35. .text
  36. #include <asm/asm-offsets.h>
  37. #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  38. /* offsets to parameters with one register pushed onto stack */
  39. #define ctx 8
  40. #define out_blk 12
  41. #define in_blk 16
  42. /* offsets in crypto_aes_ctx structure */
  43. #define klen (480)
  44. #define ekey (0)
  45. #define dkey (240)
  46. // register mapping for encrypt and decrypt subroutines
  47. #define r0 eax
  48. #define r1 ebx
  49. #define r2 ecx
  50. #define r3 edx
  51. #define r4 esi
  52. #define r5 edi
  53. #define eaxl al
  54. #define eaxh ah
  55. #define ebxl bl
  56. #define ebxh bh
  57. #define ecxl cl
  58. #define ecxh ch
  59. #define edxl dl
  60. #define edxh dh
  61. #define _h(reg) reg##h
  62. #define h(reg) _h(reg)
  63. #define _l(reg) reg##l
  64. #define l(reg) _l(reg)
  65. // This macro takes a 32-bit word representing a column and uses
  66. // each of its four bytes to index into four tables of 256 32-bit
  67. // words to obtain values that are then xored into the appropriate
  68. // output registers r0, r1, r4 or r5.
  69. // Parameters:
  70. // table table base address
  71. // %1 out_state[0]
  72. // %2 out_state[1]
  73. // %3 out_state[2]
  74. // %4 out_state[3]
  75. // idx input register for the round (destroyed)
  76. // tmp scratch register for the round
  77. // sched key schedule
  78. #define do_col(table, a1,a2,a3,a4, idx, tmp) \
  79. movzx %l(idx),%tmp; \
  80. xor table(,%tmp,4),%a1; \
  81. movzx %h(idx),%tmp; \
  82. shr $16,%idx; \
  83. xor table+tlen(,%tmp,4),%a2; \
  84. movzx %l(idx),%tmp; \
  85. movzx %h(idx),%idx; \
  86. xor table+2*tlen(,%tmp,4),%a3; \
  87. xor table+3*tlen(,%idx,4),%a4;
  88. // initialise output registers from the key schedule
  89. // NB1: original value of a3 is in idx on exit
  90. // NB2: original values of a1,a2,a4 aren't used
  91. #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
  92. mov 0 sched,%a1; \
  93. movzx %l(idx),%tmp; \
  94. mov 12 sched,%a2; \
  95. xor table(,%tmp,4),%a1; \
  96. mov 4 sched,%a4; \
  97. movzx %h(idx),%tmp; \
  98. shr $16,%idx; \
  99. xor table+tlen(,%tmp,4),%a2; \
  100. movzx %l(idx),%tmp; \
  101. movzx %h(idx),%idx; \
  102. xor table+3*tlen(,%idx,4),%a4; \
  103. mov %a3,%idx; \
  104. mov 8 sched,%a3; \
  105. xor table+2*tlen(,%tmp,4),%a3;
  106. // initialise output registers from the key schedule
  107. // NB1: original value of a3 is in idx on exit
  108. // NB2: original values of a1,a2,a4 aren't used
  109. #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
  110. mov 0 sched,%a1; \
  111. movzx %l(idx),%tmp; \
  112. mov 4 sched,%a2; \
  113. xor table(,%tmp,4),%a1; \
  114. mov 12 sched,%a4; \
  115. movzx %h(idx),%tmp; \
  116. shr $16,%idx; \
  117. xor table+tlen(,%tmp,4),%a2; \
  118. movzx %l(idx),%tmp; \
  119. movzx %h(idx),%idx; \
  120. xor table+3*tlen(,%idx,4),%a4; \
  121. mov %a3,%idx; \
  122. mov 8 sched,%a3; \
  123. xor table+2*tlen(,%tmp,4),%a3;
  124. // original Gladman had conditional saves to MMX regs.
  125. #define save(a1, a2) \
  126. mov %a2,4*a1(%esp)
  127. #define restore(a1, a2) \
  128. mov 4*a2(%esp),%a1
  129. // These macros perform a forward encryption cycle. They are entered with
  130. // the first previous round column values in r0,r1,r4,r5 and
  131. // exit with the final values in the same registers, using stack
  132. // for temporary storage.
  133. // round column values
  134. // on entry: r0,r1,r4,r5
  135. // on exit: r2,r1,r4,r5
  136. #define fwd_rnd1(arg, table) \
  137. save (0,r1); \
  138. save (1,r5); \
  139. \
  140. /* compute new column values */ \
  141. do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
  142. do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
  143. restore(r0,0); \
  144. do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
  145. restore(r0,1); \
  146. do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
  147. // round column values
  148. // on entry: r2,r1,r4,r5
  149. // on exit: r0,r1,r4,r5
  150. #define fwd_rnd2(arg, table) \
  151. save (0,r1); \
  152. save (1,r5); \
  153. \
  154. /* compute new column values */ \
  155. do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
  156. do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
  157. restore(r2,0); \
  158. do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
  159. restore(r2,1); \
  160. do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
  161. // These macros performs an inverse encryption cycle. They are entered with
  162. // the first previous round column values in r0,r1,r4,r5 and
  163. // exit with the final values in the same registers, using stack
  164. // for temporary storage
  165. // round column values
  166. // on entry: r0,r1,r4,r5
  167. // on exit: r2,r1,r4,r5
  168. #define inv_rnd1(arg, table) \
  169. save (0,r1); \
  170. save (1,r5); \
  171. \
  172. /* compute new column values */ \
  173. do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
  174. do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
  175. restore(r0,0); \
  176. do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
  177. restore(r0,1); \
  178. do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
  179. // round column values
  180. // on entry: r2,r1,r4,r5
  181. // on exit: r0,r1,r4,r5
  182. #define inv_rnd2(arg, table) \
  183. save (0,r1); \
  184. save (1,r5); \
  185. \
  186. /* compute new column values */ \
  187. do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
  188. do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
  189. restore(r2,0); \
  190. do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
  191. restore(r2,1); \
  192. do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
  193. // AES (Rijndael) Encryption Subroutine
  194. /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
  195. .global aes_enc_blk
  196. .extern crypto_ft_tab
  197. .extern crypto_fl_tab
  198. .align 4
  199. aes_enc_blk:
  200. push %ebp
  201. mov ctx(%esp),%ebp
  202. // CAUTION: the order and the values used in these assigns
  203. // rely on the register mappings
  204. 1: push %ebx
  205. mov in_blk+4(%esp),%r2
  206. push %esi
  207. mov klen(%ebp),%r3 // key size
  208. push %edi
  209. #if ekey != 0
  210. lea ekey(%ebp),%ebp // key pointer
  211. #endif
  212. // input four columns and xor in first round key
  213. mov (%r2),%r0
  214. mov 4(%r2),%r1
  215. mov 8(%r2),%r4
  216. mov 12(%r2),%r5
  217. xor (%ebp),%r0
  218. xor 4(%ebp),%r1
  219. xor 8(%ebp),%r4
  220. xor 12(%ebp),%r5
  221. sub $8,%esp // space for register saves on stack
  222. add $16,%ebp // increment to next round key
  223. cmp $24,%r3
  224. jb 4f // 10 rounds for 128-bit key
  225. lea 32(%ebp),%ebp
  226. je 3f // 12 rounds for 192-bit key
  227. lea 32(%ebp),%ebp
  228. 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
  229. fwd_rnd2( -48(%ebp), crypto_ft_tab)
  230. 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
  231. fwd_rnd2( -16(%ebp), crypto_ft_tab)
  232. 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
  233. fwd_rnd2( +16(%ebp), crypto_ft_tab)
  234. fwd_rnd1( +32(%ebp), crypto_ft_tab)
  235. fwd_rnd2( +48(%ebp), crypto_ft_tab)
  236. fwd_rnd1( +64(%ebp), crypto_ft_tab)
  237. fwd_rnd2( +80(%ebp), crypto_ft_tab)
  238. fwd_rnd1( +96(%ebp), crypto_ft_tab)
  239. fwd_rnd2(+112(%ebp), crypto_ft_tab)
  240. fwd_rnd1(+128(%ebp), crypto_ft_tab)
  241. fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
  242. // move final values to the output array. CAUTION: the
  243. // order of these assigns rely on the register mappings
  244. add $8,%esp
  245. mov out_blk+12(%esp),%ebp
  246. mov %r5,12(%ebp)
  247. pop %edi
  248. mov %r4,8(%ebp)
  249. pop %esi
  250. mov %r1,4(%ebp)
  251. pop %ebx
  252. mov %r0,(%ebp)
  253. pop %ebp
  254. ret
  255. // AES (Rijndael) Decryption Subroutine
  256. /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
  257. .global aes_dec_blk
  258. .extern crypto_it_tab
  259. .extern crypto_il_tab
  260. .align 4
  261. aes_dec_blk:
  262. push %ebp
  263. mov ctx(%esp),%ebp
  264. // CAUTION: the order and the values used in these assigns
  265. // rely on the register mappings
  266. 1: push %ebx
  267. mov in_blk+4(%esp),%r2
  268. push %esi
  269. mov klen(%ebp),%r3 // key size
  270. push %edi
  271. #if dkey != 0
  272. lea dkey(%ebp),%ebp // key pointer
  273. #endif
  274. // input four columns and xor in first round key
  275. mov (%r2),%r0
  276. mov 4(%r2),%r1
  277. mov 8(%r2),%r4
  278. mov 12(%r2),%r5
  279. xor (%ebp),%r0
  280. xor 4(%ebp),%r1
  281. xor 8(%ebp),%r4
  282. xor 12(%ebp),%r5
  283. sub $8,%esp // space for register saves on stack
  284. add $16,%ebp // increment to next round key
  285. cmp $24,%r3
  286. jb 4f // 10 rounds for 128-bit key
  287. lea 32(%ebp),%ebp
  288. je 3f // 12 rounds for 192-bit key
  289. lea 32(%ebp),%ebp
  290. 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
  291. inv_rnd2( -48(%ebp), crypto_it_tab)
  292. 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
  293. inv_rnd2( -16(%ebp), crypto_it_tab)
  294. 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
  295. inv_rnd2( +16(%ebp), crypto_it_tab)
  296. inv_rnd1( +32(%ebp), crypto_it_tab)
  297. inv_rnd2( +48(%ebp), crypto_it_tab)
  298. inv_rnd1( +64(%ebp), crypto_it_tab)
  299. inv_rnd2( +80(%ebp), crypto_it_tab)
  300. inv_rnd1( +96(%ebp), crypto_it_tab)
  301. inv_rnd2(+112(%ebp), crypto_it_tab)
  302. inv_rnd1(+128(%ebp), crypto_it_tab)
  303. inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
  304. // move final values to the output array. CAUTION: the
  305. // order of these assigns rely on the register mappings
  306. add $8,%esp
  307. mov out_blk+12(%esp),%ebp
  308. mov %r5,12(%ebp)
  309. pop %edi
  310. mov %r4,8(%ebp)
  311. pop %esi
  312. mov %r1,4(%ebp)
  313. pop %ebx
  314. mov %r0,(%ebp)
  315. pop %ebp
  316. ret