123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- #include "cpusupport.h"
- #ifdef CPUSUPPORT_X86_AESNI
- /**
- * CPUSUPPORT CFLAGS: X86_AESNI
- */
- #include <stdint.h>
- #include <stdlib.h>
- #include <wmmintrin.h>
- #include "align_ptr.h"
- #include "insecure_memzero.h"
- #include "warnp.h"
- #include "crypto_aes_aesni.h"
- #include "crypto_aes_aesni_m128i.h"
- /* Expanded-key structure. */
- struct crypto_aes_key_aesni {
- ALIGN_PTR_DECL(__m128i, rkeys, 15, sizeof(__m128i));
- size_t nr;
- };
- /* Compute an AES-128 round key. */
- #define MKRKEY128(rkeys, i, rcon) do { \
- __m128i _s = rkeys[i - 1]; \
- __m128i _t = rkeys[i - 1]; \
- _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
- _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
- _t = _mm_aeskeygenassist_si128(_t, rcon); \
- _t = _mm_shuffle_epi32(_t, 0xff); \
- rkeys[i] = _mm_xor_si128(_s, _t); \
- } while (0)
- /**
- * crypto_aes_key_expand_128_aesni(key_unexpanded, rkeys):
- * Expand the 128-bit AES unexpanded key ${key_unexpanded} into the 11 round
- * keys ${rkeys}. This implementation uses x86 AESNI instructions, and should
- * only be used if CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni()
- * returns nonzero.
- */
- static void
- crypto_aes_key_expand_128_aesni(const uint8_t key_unexpanded[16],
- __m128i rkeys[11])
- {
- /* The first round key is just the key. */
- /*-
- * XXX Compiler breakage:
- * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
- * taking a (const __m128i *) parameter. This forces us to write a
- * bug: The cast to (const __m128i *) is invalid since it increases
- * the alignment requirement of the pointer. Alas, until compilers
- * get fixed intrinsics, all we can do is code the bug and require
- * that alignment-requirement-increasing compiler warnings get
- * disabled.
- */
- rkeys[0] = _mm_loadu_si128((const __m128i *)&key_unexpanded[0]);
- /*
- * Each of the remaining round keys are computed from the preceding
- * round key: rotword+subword+rcon (provided as aeskeygenassist) to
- * compute the 'temp' value, then xor with 1, 2, 3, or all 4 of the
- * 32-bit words from the preceding round key. Unfortunately, 'rcon'
- * is encoded as an immediate value, so we need to write the loop out
- * ourselves rather than allowing the compiler to expand it.
- */
- MKRKEY128(rkeys, 1, 0x01);
- MKRKEY128(rkeys, 2, 0x02);
- MKRKEY128(rkeys, 3, 0x04);
- MKRKEY128(rkeys, 4, 0x08);
- MKRKEY128(rkeys, 5, 0x10);
- MKRKEY128(rkeys, 6, 0x20);
- MKRKEY128(rkeys, 7, 0x40);
- MKRKEY128(rkeys, 8, 0x80);
- MKRKEY128(rkeys, 9, 0x1b);
- MKRKEY128(rkeys, 10, 0x36);
- }
- /* Compute an AES-256 round key. */
- #define MKRKEY256(rkeys, i, shuffle, rcon) do { \
- __m128i _s = rkeys[i - 2]; \
- __m128i _t = rkeys[i - 1]; \
- _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
- _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
- _t = _mm_aeskeygenassist_si128(_t, rcon); \
- _t = _mm_shuffle_epi32(_t, shuffle); \
- rkeys[i] = _mm_xor_si128(_s, _t); \
- } while (0)
- /**
- * crypto_aes_key_expand_256_aesni(key_unexpanded, rkeys):
- * Expand the 256-bit unexpanded AES key ${key_unexpanded} into the 15 round
- * keys ${rkeys}. This implementation uses x86 AESNI instructions, and should
- * only be used if CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni()
- * returns nonzero.
- */
- static void
- crypto_aes_key_expand_256_aesni(const uint8_t key_unexpanded[32],
- __m128i rkeys[15])
- {
- /* The first two round keys are just the key. */
- /*-
- * XXX Compiler breakage:
- * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
- * taking a (const __m128i *) parameter. This forces us to write a
- * bug: The cast to (const __m128i *) is invalid since it increases
- * the alignment requirement of the pointer. Alas, until compilers
- * get fixed intrinsics, all we can do is code the bug and require
- * that alignment-requirement-increasing compiler warnings get
- * disabled.
- */
- rkeys[0] = _mm_loadu_si128((const __m128i *)&key_unexpanded[0]);
- rkeys[1] = _mm_loadu_si128((const __m128i *)&key_unexpanded[16]);
- /*
- * Each of the remaining round keys are computed from the preceding
- * pair of keys. Even rounds use rotword+subword+rcon, while odd
- * rounds just use subword; the aeskeygenassist instruction computes
- * both, and we use 0xff or 0xaa to select the one we need. The rcon
- * value used is irrelevant for odd rounds since we ignore the value
- * which it feeds into. Unfortunately, the 'shuffle' and 'rcon'
- * values are encoded into the instructions as immediates, so we need
- * to write the loop out ourselves rather than allowing the compiler
- * to expand it.
- */
- MKRKEY256(rkeys, 2, 0xff, 0x01);
- MKRKEY256(rkeys, 3, 0xaa, 0x00);
- MKRKEY256(rkeys, 4, 0xff, 0x02);
- MKRKEY256(rkeys, 5, 0xaa, 0x00);
- MKRKEY256(rkeys, 6, 0xff, 0x04);
- MKRKEY256(rkeys, 7, 0xaa, 0x00);
- MKRKEY256(rkeys, 8, 0xff, 0x08);
- MKRKEY256(rkeys, 9, 0xaa, 0x00);
- MKRKEY256(rkeys, 10, 0xff, 0x10);
- MKRKEY256(rkeys, 11, 0xaa, 0x00);
- MKRKEY256(rkeys, 12, 0xff, 0x20);
- MKRKEY256(rkeys, 13, 0xaa, 0x00);
- MKRKEY256(rkeys, 14, 0xff, 0x40);
- }
- /**
- * crypto_aes_key_expand_aesni(key_unexpanded, len):
- * Expand the ${len}-byte unexpanded AES key ${key_unexpanded} into a
- * structure which can be passed to crypto_aes_encrypt_block_aesni(). The
- * length must be 16 or 32. This implementation uses x86 AESNI instructions,
- * and should only be used if CPUSUPPORT_X86_AESNI is defined and
- * cpusupport_x86_aesni() returns nonzero.
- */
- void *
- crypto_aes_key_expand_aesni(const uint8_t * key_unexpanded, size_t len)
- {
- struct crypto_aes_key_aesni * kexp;
- /* Allocate structure. */
- if ((kexp = malloc(sizeof(struct crypto_aes_key_aesni))) == NULL)
- goto err0;
- /* Figure out where to put the round keys. */
- ALIGN_PTR_INIT(kexp->rkeys, sizeof(__m128i));
- /* Compute round keys. */
- if (len == 16) {
- kexp->nr = 10;
- crypto_aes_key_expand_128_aesni(key_unexpanded, kexp->rkeys);
- } else if (len == 32) {
- kexp->nr = 14;
- crypto_aes_key_expand_256_aesni(key_unexpanded, kexp->rkeys);
- } else {
- warn0("Unsupported AES key length: %zu bytes", len);
- goto err1;
- }
- /* Success! */
- return (kexp);
- err1:
- free(kexp);
- err0:
- /* Failure! */
- return (NULL);
- }
- /**
- * crypto_aes_encrypt_block_aesni_m128i(in, key):
- * Using the expanded AES key ${key}, encrypt the block ${in} and return the
- * resulting ciphertext. This implementation uses x86 AESNI instructions,
- * and should only be used if CPUSUPPORT_X86_AESNI is defined and
- * cpusupport_x86_aesni() returns nonzero.
- */
- __m128i
- crypto_aes_encrypt_block_aesni_m128i(__m128i in, const void * key)
- {
- const struct crypto_aes_key_aesni * _key = key;
- const __m128i * aes_key = _key->rkeys;
- __m128i aes_state = in;
- size_t nr = _key->nr;
- aes_state = _mm_xor_si128(aes_state, aes_key[0]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[1]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[2]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[3]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[4]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[5]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[6]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[7]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[8]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[9]);
- if (nr > 10) {
- aes_state = _mm_aesenc_si128(aes_state, aes_key[10]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[11]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[12]);
- aes_state = _mm_aesenc_si128(aes_state, aes_key[13]);
- }
- aes_state = _mm_aesenclast_si128(aes_state, aes_key[nr]);
- return (aes_state);
- }
- /**
- * crypto_aes_encrypt_block_aesni(in, out, key):
- * Using the expanded AES key ${key}, encrypt the block ${in} and write the
- * resulting ciphertext to ${out}. ${in} and ${out} can overlap. This
- * implementation uses x86 AESNI instructions, and should only be used if
- * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
- */
- void
- crypto_aes_encrypt_block_aesni(const uint8_t in[16], uint8_t out[16],
- const void * key)
- {
- __m128i aes_state;
- aes_state = _mm_loadu_si128((const __m128i *)in);
- aes_state = crypto_aes_encrypt_block_aesni_m128i(aes_state, key);
- _mm_storeu_si128((__m128i *)out, aes_state);
- }
- /**
- * crypto_aes_key_free_aesni(key):
- * Free the expanded AES key ${key}.
- */
- void
- crypto_aes_key_free_aesni(void * key)
- {
- /* Behave consistently with free(NULL). */
- if (key == NULL)
- return;
- /* Attempt to zero the expanded key. */
- insecure_memzero(key, sizeof(struct crypto_aes_key_aesni));
- /* Free the key. */
- free(key);
- }
- #endif /* CPUSUPPORT_X86_AESNI */
|