123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- // -------------------------------------------------------------------------
- // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
- // All rights reserved.
- //
- // LICENSE TERMS
- //
- // The free distribution and use of this software in both source and binary
- // form is allowed (with or without changes) provided that:
- //
- // 1. distributions of this source code include the above copyright
- // notice, this list of conditions and the following disclaimer//
- //
- // 2. distributions in binary form include the above copyright
- // notice, this list of conditions and the following disclaimer
- // in the documentation and/or other associated materials//
- //
- // 3. the copyright holder's name is not used to endorse products
- // built using this software without specific written permission.
- //
- //
- // ALTERNATIVELY, provided that this notice is retained in full, this product
- // may be distributed under the terms of the GNU General Public License (GPL),
- // in which case the provisions of the GPL apply INSTEAD OF those given above.
- //
- // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
- // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
- // DISCLAIMER
- //
- // This software is provided 'as is' with no explicit or implied warranties
- // in respect of its properties including, but not limited to, correctness
- // and fitness for purpose.
- // -------------------------------------------------------------------------
- // Issue Date: 29/07/2002
- .file "aes-i586-asm.S"
- .text
- #include <asm/asm-offsets.h>
- #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
- /* offsets to parameters with one register pushed onto stack */
- #define ctx 8
- #define out_blk 12
- #define in_blk 16
- /* offsets in crypto_aes_ctx structure */
- #define klen (480)
- #define ekey (0)
- #define dkey (240)
- // register mapping for encrypt and decrypt subroutines
- #define r0 eax
- #define r1 ebx
- #define r2 ecx
- #define r3 edx
- #define r4 esi
- #define r5 edi
- #define eaxl al
- #define eaxh ah
- #define ebxl bl
- #define ebxh bh
- #define ecxl cl
- #define ecxh ch
- #define edxl dl
- #define edxh dh
- #define _h(reg) reg##h
- #define h(reg) _h(reg)
- #define _l(reg) reg##l
- #define l(reg) _l(reg)
- // This macro takes a 32-bit word representing a column and uses
- // each of its four bytes to index into four tables of 256 32-bit
- // words to obtain values that are then xored into the appropriate
- // output registers r0, r1, r4 or r5.
- // Parameters:
- // table table base address
- // %1 out_state[0]
- // %2 out_state[1]
- // %3 out_state[2]
- // %4 out_state[3]
- // idx input register for the round (destroyed)
- // tmp scratch register for the round
- // sched key schedule
- #define do_col(table, a1,a2,a3,a4, idx, tmp) \
- movzx %l(idx),%tmp; \
- xor table(,%tmp,4),%a1; \
- movzx %h(idx),%tmp; \
- shr $16,%idx; \
- xor table+tlen(,%tmp,4),%a2; \
- movzx %l(idx),%tmp; \
- movzx %h(idx),%idx; \
- xor table+2*tlen(,%tmp,4),%a3; \
- xor table+3*tlen(,%idx,4),%a4;
- // initialise output registers from the key schedule
- // NB1: original value of a3 is in idx on exit
- // NB2: original values of a1,a2,a4 aren't used
- #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
- mov 0 sched,%a1; \
- movzx %l(idx),%tmp; \
- mov 12 sched,%a2; \
- xor table(,%tmp,4),%a1; \
- mov 4 sched,%a4; \
- movzx %h(idx),%tmp; \
- shr $16,%idx; \
- xor table+tlen(,%tmp,4),%a2; \
- movzx %l(idx),%tmp; \
- movzx %h(idx),%idx; \
- xor table+3*tlen(,%idx,4),%a4; \
- mov %a3,%idx; \
- mov 8 sched,%a3; \
- xor table+2*tlen(,%tmp,4),%a3;
- // initialise output registers from the key schedule
- // NB1: original value of a3 is in idx on exit
- // NB2: original values of a1,a2,a4 aren't used
- #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
- mov 0 sched,%a1; \
- movzx %l(idx),%tmp; \
- mov 4 sched,%a2; \
- xor table(,%tmp,4),%a1; \
- mov 12 sched,%a4; \
- movzx %h(idx),%tmp; \
- shr $16,%idx; \
- xor table+tlen(,%tmp,4),%a2; \
- movzx %l(idx),%tmp; \
- movzx %h(idx),%idx; \
- xor table+3*tlen(,%idx,4),%a4; \
- mov %a3,%idx; \
- mov 8 sched,%a3; \
- xor table+2*tlen(,%tmp,4),%a3;
- // original Gladman had conditional saves to MMX regs.
- #define save(a1, a2) \
- mov %a2,4*a1(%esp)
- #define restore(a1, a2) \
- mov 4*a2(%esp),%a1
- // These macros perform a forward encryption cycle. They are entered with
- // the first previous round column values in r0,r1,r4,r5 and
- // exit with the final values in the same registers, using stack
- // for temporary storage.
- // round column values
- // on entry: r0,r1,r4,r5
- // on exit: r2,r1,r4,r5
- #define fwd_rnd1(arg, table) \
- save (0,r1); \
- save (1,r5); \
- \
- /* compute new column values */ \
- do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
- do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
- restore(r0,0); \
- do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
- restore(r0,1); \
- do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
- // round column values
- // on entry: r2,r1,r4,r5
- // on exit: r0,r1,r4,r5
- #define fwd_rnd2(arg, table) \
- save (0,r1); \
- save (1,r5); \
- \
- /* compute new column values */ \
- do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
- do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
- restore(r2,0); \
- do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
- restore(r2,1); \
- do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
- // These macros performs an inverse encryption cycle. They are entered with
- // the first previous round column values in r0,r1,r4,r5 and
- // exit with the final values in the same registers, using stack
- // for temporary storage
- // round column values
- // on entry: r0,r1,r4,r5
- // on exit: r2,r1,r4,r5
- #define inv_rnd1(arg, table) \
- save (0,r1); \
- save (1,r5); \
- \
- /* compute new column values */ \
- do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
- do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
- restore(r0,0); \
- do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
- restore(r0,1); \
- do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
- // round column values
- // on entry: r2,r1,r4,r5
- // on exit: r0,r1,r4,r5
- #define inv_rnd2(arg, table) \
- save (0,r1); \
- save (1,r5); \
- \
- /* compute new column values */ \
- do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
- do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
- restore(r2,0); \
- do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
- restore(r2,1); \
- do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
- // AES (Rijndael) Encryption Subroutine
- /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
- .global aes_enc_blk
- .extern crypto_ft_tab
- .extern crypto_fl_tab
- .align 4
- aes_enc_blk:
- push %ebp
- mov ctx(%esp),%ebp
- // CAUTION: the order and the values used in these assigns
- // rely on the register mappings
- 1: push %ebx
- mov in_blk+4(%esp),%r2
- push %esi
- mov klen(%ebp),%r3 // key size
- push %edi
- #if ekey != 0
- lea ekey(%ebp),%ebp // key pointer
- #endif
- // input four columns and xor in first round key
- mov (%r2),%r0
- mov 4(%r2),%r1
- mov 8(%r2),%r4
- mov 12(%r2),%r5
- xor (%ebp),%r0
- xor 4(%ebp),%r1
- xor 8(%ebp),%r4
- xor 12(%ebp),%r5
- sub $8,%esp // space for register saves on stack
- add $16,%ebp // increment to next round key
- cmp $24,%r3
- jb 4f // 10 rounds for 128-bit key
- lea 32(%ebp),%ebp
- je 3f // 12 rounds for 192-bit key
- lea 32(%ebp),%ebp
- 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
- fwd_rnd2( -48(%ebp), crypto_ft_tab)
- 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
- fwd_rnd2( -16(%ebp), crypto_ft_tab)
- 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
- fwd_rnd2( +16(%ebp), crypto_ft_tab)
- fwd_rnd1( +32(%ebp), crypto_ft_tab)
- fwd_rnd2( +48(%ebp), crypto_ft_tab)
- fwd_rnd1( +64(%ebp), crypto_ft_tab)
- fwd_rnd2( +80(%ebp), crypto_ft_tab)
- fwd_rnd1( +96(%ebp), crypto_ft_tab)
- fwd_rnd2(+112(%ebp), crypto_ft_tab)
- fwd_rnd1(+128(%ebp), crypto_ft_tab)
- fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
- // move final values to the output array. CAUTION: the
- // order of these assigns rely on the register mappings
- add $8,%esp
- mov out_blk+12(%esp),%ebp
- mov %r5,12(%ebp)
- pop %edi
- mov %r4,8(%ebp)
- pop %esi
- mov %r1,4(%ebp)
- pop %ebx
- mov %r0,(%ebp)
- pop %ebp
- ret
- // AES (Rijndael) Decryption Subroutine
- /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
- .global aes_dec_blk
- .extern crypto_it_tab
- .extern crypto_il_tab
- .align 4
- aes_dec_blk:
- push %ebp
- mov ctx(%esp),%ebp
- // CAUTION: the order and the values used in these assigns
- // rely on the register mappings
- 1: push %ebx
- mov in_blk+4(%esp),%r2
- push %esi
- mov klen(%ebp),%r3 // key size
- push %edi
- #if dkey != 0
- lea dkey(%ebp),%ebp // key pointer
- #endif
-
- // input four columns and xor in first round key
- mov (%r2),%r0
- mov 4(%r2),%r1
- mov 8(%r2),%r4
- mov 12(%r2),%r5
- xor (%ebp),%r0
- xor 4(%ebp),%r1
- xor 8(%ebp),%r4
- xor 12(%ebp),%r5
- sub $8,%esp // space for register saves on stack
- add $16,%ebp // increment to next round key
- cmp $24,%r3
- jb 4f // 10 rounds for 128-bit key
- lea 32(%ebp),%ebp
- je 3f // 12 rounds for 192-bit key
- lea 32(%ebp),%ebp
- 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
- inv_rnd2( -48(%ebp), crypto_it_tab)
- 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
- inv_rnd2( -16(%ebp), crypto_it_tab)
- 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
- inv_rnd2( +16(%ebp), crypto_it_tab)
- inv_rnd1( +32(%ebp), crypto_it_tab)
- inv_rnd2( +48(%ebp), crypto_it_tab)
- inv_rnd1( +64(%ebp), crypto_it_tab)
- inv_rnd2( +80(%ebp), crypto_it_tab)
- inv_rnd1( +96(%ebp), crypto_it_tab)
- inv_rnd2(+112(%ebp), crypto_it_tab)
- inv_rnd1(+128(%ebp), crypto_it_tab)
- inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
- // move final values to the output array. CAUTION: the
- // order of these assigns rely on the register mappings
- add $8,%esp
- mov out_blk+12(%esp),%ebp
- mov %r5,12(%ebp)
- pop %edi
- mov %r4,8(%ebp)
- pop %esi
- mov %r1,4(%ebp)
- pop %ebx
- mov %r0,(%ebp)
- pop %ebp
- ret
|