aes-i586-asm_32.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. // -------------------------------------------------------------------------
  2. // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
  3. // All rights reserved.
  4. //
  5. // LICENSE TERMS
  6. //
  7. // The free distribution and use of this software in both source and binary
  8. // form is allowed (with or without changes) provided that:
  9. //
  10. // 1. distributions of this source code include the above copyright
  11. // notice, this list of conditions and the following disclaimer//
  12. //
  13. // 2. distributions in binary form include the above copyright
  14. // notice, this list of conditions and the following disclaimer
  15. // in the documentation and/or other associated materials//
  16. //
  17. // 3. the copyright holder's name is not used to endorse products
  18. // built using this software without specific written permission.
  19. //
  20. //
  21. // ALTERNATIVELY, provided that this notice is retained in full, this product
  22. // may be distributed under the terms of the GNU General Public License (GPL),
  23. // in which case the provisions of the GPL apply INSTEAD OF those given above.
  24. //
  25. // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
  26. // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
  27. // DISCLAIMER
  28. //
  29. // This software is provided 'as is' with no explicit or implied warranties
  30. // in respect of its properties including, but not limited to, correctness
  31. // and fitness for purpose.
  32. // -------------------------------------------------------------------------
  33. // Issue Date: 29/07/2002
  34. .file "aes-i586-asm.S"
  35. .text
  36. #include <linux/linkage.h>
  37. #include <asm/asm-offsets.h>
  38. #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
  39. /* offsets to parameters with one register pushed onto stack */
  40. #define ctx 8
  41. #define out_blk 12
  42. #define in_blk 16
  43. /* offsets in crypto_aes_ctx structure */
  44. #define klen (480)
  45. #define ekey (0)
  46. #define dkey (240)
  47. // register mapping for encrypt and decrypt subroutines
  48. #define r0 eax
  49. #define r1 ebx
  50. #define r2 ecx
  51. #define r3 edx
  52. #define r4 esi
  53. #define r5 edi
  54. #define eaxl al
  55. #define eaxh ah
  56. #define ebxl bl
  57. #define ebxh bh
  58. #define ecxl cl
  59. #define ecxh ch
  60. #define edxl dl
  61. #define edxh dh
  62. #define _h(reg) reg##h
  63. #define h(reg) _h(reg)
  64. #define _l(reg) reg##l
  65. #define l(reg) _l(reg)
  66. // This macro takes a 32-bit word representing a column and uses
  67. // each of its four bytes to index into four tables of 256 32-bit
  68. // words to obtain values that are then xored into the appropriate
  69. // output registers r0, r1, r4 or r5.
  70. // Parameters:
  71. // table table base address
  72. // %1 out_state[0]
  73. // %2 out_state[1]
  74. // %3 out_state[2]
  75. // %4 out_state[3]
  76. // idx input register for the round (destroyed)
  77. // tmp scratch register for the round
  78. // sched key schedule
  79. #define do_col(table, a1,a2,a3,a4, idx, tmp) \
  80. movzx %l(idx),%tmp; \
  81. xor table(,%tmp,4),%a1; \
  82. movzx %h(idx),%tmp; \
  83. shr $16,%idx; \
  84. xor table+tlen(,%tmp,4),%a2; \
  85. movzx %l(idx),%tmp; \
  86. movzx %h(idx),%idx; \
  87. xor table+2*tlen(,%tmp,4),%a3; \
  88. xor table+3*tlen(,%idx,4),%a4;
  89. // initialise output registers from the key schedule
  90. // NB1: original value of a3 is in idx on exit
  91. // NB2: original values of a1,a2,a4 aren't used
  92. #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
  93. mov 0 sched,%a1; \
  94. movzx %l(idx),%tmp; \
  95. mov 12 sched,%a2; \
  96. xor table(,%tmp,4),%a1; \
  97. mov 4 sched,%a4; \
  98. movzx %h(idx),%tmp; \
  99. shr $16,%idx; \
  100. xor table+tlen(,%tmp,4),%a2; \
  101. movzx %l(idx),%tmp; \
  102. movzx %h(idx),%idx; \
  103. xor table+3*tlen(,%idx,4),%a4; \
  104. mov %a3,%idx; \
  105. mov 8 sched,%a3; \
  106. xor table+2*tlen(,%tmp,4),%a3;
  107. // initialise output registers from the key schedule
  108. // NB1: original value of a3 is in idx on exit
  109. // NB2: original values of a1,a2,a4 aren't used
  110. #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
  111. mov 0 sched,%a1; \
  112. movzx %l(idx),%tmp; \
  113. mov 4 sched,%a2; \
  114. xor table(,%tmp,4),%a1; \
  115. mov 12 sched,%a4; \
  116. movzx %h(idx),%tmp; \
  117. shr $16,%idx; \
  118. xor table+tlen(,%tmp,4),%a2; \
  119. movzx %l(idx),%tmp; \
  120. movzx %h(idx),%idx; \
  121. xor table+3*tlen(,%idx,4),%a4; \
  122. mov %a3,%idx; \
  123. mov 8 sched,%a3; \
  124. xor table+2*tlen(,%tmp,4),%a3;
  125. // original Gladman had conditional saves to MMX regs.
  126. #define save(a1, a2) \
  127. mov %a2,4*a1(%esp)
  128. #define restore(a1, a2) \
  129. mov 4*a2(%esp),%a1
  130. // These macros perform a forward encryption cycle. They are entered with
  131. // the first previous round column values in r0,r1,r4,r5 and
  132. // exit with the final values in the same registers, using stack
  133. // for temporary storage.
  134. // round column values
  135. // on entry: r0,r1,r4,r5
  136. // on exit: r2,r1,r4,r5
  137. #define fwd_rnd1(arg, table) \
  138. save (0,r1); \
  139. save (1,r5); \
  140. \
  141. /* compute new column values */ \
  142. do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
  143. do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
  144. restore(r0,0); \
  145. do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
  146. restore(r0,1); \
  147. do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
  148. // round column values
  149. // on entry: r2,r1,r4,r5
  150. // on exit: r0,r1,r4,r5
  151. #define fwd_rnd2(arg, table) \
  152. save (0,r1); \
  153. save (1,r5); \
  154. \
  155. /* compute new column values */ \
  156. do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
  157. do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
  158. restore(r2,0); \
  159. do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
  160. restore(r2,1); \
  161. do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
  162. // These macros performs an inverse encryption cycle. They are entered with
  163. // the first previous round column values in r0,r1,r4,r5 and
  164. // exit with the final values in the same registers, using stack
  165. // for temporary storage
  166. // round column values
  167. // on entry: r0,r1,r4,r5
  168. // on exit: r2,r1,r4,r5
  169. #define inv_rnd1(arg, table) \
  170. save (0,r1); \
  171. save (1,r5); \
  172. \
  173. /* compute new column values */ \
  174. do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
  175. do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
  176. restore(r0,0); \
  177. do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
  178. restore(r0,1); \
  179. do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
  180. // round column values
  181. // on entry: r2,r1,r4,r5
  182. // on exit: r0,r1,r4,r5
  183. #define inv_rnd2(arg, table) \
  184. save (0,r1); \
  185. save (1,r5); \
  186. \
  187. /* compute new column values */ \
  188. do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
  189. do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
  190. restore(r2,0); \
  191. do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
  192. restore(r2,1); \
  193. do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
  194. // AES (Rijndael) Encryption Subroutine
  195. /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
  196. .extern crypto_ft_tab
  197. .extern crypto_fl_tab
  198. ENTRY(aes_enc_blk)
  199. push %ebp
  200. mov ctx(%esp),%ebp
  201. // CAUTION: the order and the values used in these assigns
  202. // rely on the register mappings
  203. 1: push %ebx
  204. mov in_blk+4(%esp),%r2
  205. push %esi
  206. mov klen(%ebp),%r3 // key size
  207. push %edi
  208. #if ekey != 0
  209. lea ekey(%ebp),%ebp // key pointer
  210. #endif
  211. // input four columns and xor in first round key
  212. mov (%r2),%r0
  213. mov 4(%r2),%r1
  214. mov 8(%r2),%r4
  215. mov 12(%r2),%r5
  216. xor (%ebp),%r0
  217. xor 4(%ebp),%r1
  218. xor 8(%ebp),%r4
  219. xor 12(%ebp),%r5
  220. sub $8,%esp // space for register saves on stack
  221. add $16,%ebp // increment to next round key
  222. cmp $24,%r3
  223. jb 4f // 10 rounds for 128-bit key
  224. lea 32(%ebp),%ebp
  225. je 3f // 12 rounds for 192-bit key
  226. lea 32(%ebp),%ebp
  227. 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
  228. fwd_rnd2( -48(%ebp), crypto_ft_tab)
  229. 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
  230. fwd_rnd2( -16(%ebp), crypto_ft_tab)
  231. 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
  232. fwd_rnd2( +16(%ebp), crypto_ft_tab)
  233. fwd_rnd1( +32(%ebp), crypto_ft_tab)
  234. fwd_rnd2( +48(%ebp), crypto_ft_tab)
  235. fwd_rnd1( +64(%ebp), crypto_ft_tab)
  236. fwd_rnd2( +80(%ebp), crypto_ft_tab)
  237. fwd_rnd1( +96(%ebp), crypto_ft_tab)
  238. fwd_rnd2(+112(%ebp), crypto_ft_tab)
  239. fwd_rnd1(+128(%ebp), crypto_ft_tab)
  240. fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
  241. // move final values to the output array. CAUTION: the
  242. // order of these assigns rely on the register mappings
  243. add $8,%esp
  244. mov out_blk+12(%esp),%ebp
  245. mov %r5,12(%ebp)
  246. pop %edi
  247. mov %r4,8(%ebp)
  248. pop %esi
  249. mov %r1,4(%ebp)
  250. pop %ebx
  251. mov %r0,(%ebp)
  252. pop %ebp
  253. ret
  254. ENDPROC(aes_enc_blk)
  255. // AES (Rijndael) Decryption Subroutine
  256. /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
  257. .extern crypto_it_tab
  258. .extern crypto_il_tab
  259. ENTRY(aes_dec_blk)
  260. push %ebp
  261. mov ctx(%esp),%ebp
  262. // CAUTION: the order and the values used in these assigns
  263. // rely on the register mappings
  264. 1: push %ebx
  265. mov in_blk+4(%esp),%r2
  266. push %esi
  267. mov klen(%ebp),%r3 // key size
  268. push %edi
  269. #if dkey != 0
  270. lea dkey(%ebp),%ebp // key pointer
  271. #endif
  272. // input four columns and xor in first round key
  273. mov (%r2),%r0
  274. mov 4(%r2),%r1
  275. mov 8(%r2),%r4
  276. mov 12(%r2),%r5
  277. xor (%ebp),%r0
  278. xor 4(%ebp),%r1
  279. xor 8(%ebp),%r4
  280. xor 12(%ebp),%r5
  281. sub $8,%esp // space for register saves on stack
  282. add $16,%ebp // increment to next round key
  283. cmp $24,%r3
  284. jb 4f // 10 rounds for 128-bit key
  285. lea 32(%ebp),%ebp
  286. je 3f // 12 rounds for 192-bit key
  287. lea 32(%ebp),%ebp
  288. 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
  289. inv_rnd2( -48(%ebp), crypto_it_tab)
  290. 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
  291. inv_rnd2( -16(%ebp), crypto_it_tab)
  292. 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
  293. inv_rnd2( +16(%ebp), crypto_it_tab)
  294. inv_rnd1( +32(%ebp), crypto_it_tab)
  295. inv_rnd2( +48(%ebp), crypto_it_tab)
  296. inv_rnd1( +64(%ebp), crypto_it_tab)
  297. inv_rnd2( +80(%ebp), crypto_it_tab)
  298. inv_rnd1( +96(%ebp), crypto_it_tab)
  299. inv_rnd2(+112(%ebp), crypto_it_tab)
  300. inv_rnd1(+128(%ebp), crypto_it_tab)
  301. inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
  302. // move final values to the output array. CAUTION: the
  303. // order of these assigns rely on the register mappings
  304. add $8,%esp
  305. mov out_blk+12(%esp),%ebp
  306. mov %r5,12(%ebp)
  307. pop %edi
  308. mov %r4,8(%ebp)
  309. pop %esi
  310. mov %r1,4(%ebp)
  311. pop %ebx
  312. mov %r0,(%ebp)
  313. pop %ebp
  314. ret
  315. ENDPROC(aes_dec_blk)