crypto_aes_aesni.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. #include "cpusupport.h"
  2. #ifdef CPUSUPPORT_X86_AESNI
  3. #include <stdint.h>
  4. #include <stdlib.h>
  5. #include <wmmintrin.h>
  6. #include "insecure_memzero.h"
  7. #include "warnp.h"
  8. #include "crypto_aes_aesni.h"
  9. /* Expanded-key structure. */
  10. struct crypto_aes_key_aesni {
  11. uint8_t rkeys_buf[15 * sizeof(__m128i) + (sizeof(__m128i) - 1)];
  12. __m128i * rkeys;
  13. size_t nr;
  14. };
  15. /* Compute an AES-128 round key. */
  16. #define MKRKEY128(rkeys, i, rcon) do { \
  17. __m128i _s = rkeys[i - 1]; \
  18. __m128i _t = rkeys[i - 1]; \
  19. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
  20. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
  21. _t = _mm_aeskeygenassist_si128(_t, rcon); \
  22. _t = _mm_shuffle_epi32(_t, 0xff); \
  23. rkeys[i] = _mm_xor_si128(_s, _t); \
  24. } while (0)
  25. /**
  26. * crypto_aes_key_expand_128_aesni(key, rkeys):
  27. * Expand the 128-bit AES key ${key} into the 11 round keys ${rkeys}. This
  28. * implementation uses x86 AESNI instructions, and should only be used if
  29. * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
  30. */
  31. static void
  32. crypto_aes_key_expand_128_aesni(const uint8_t key[16], __m128i rkeys[11])
  33. {
  34. /* The first round key is just the key. */
  35. /**
  36. * XXX Compiler breakage:
  37. * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
  38. * taking a (const __m128i *) parameter. This forces us to write a
  39. * bug: The cast to (const __m128i *) is invalid since it increases
  40. * the alignment requirement of the pointer. Alas, until compilers
  41. * get fixed intrinsics, all we can do is code the bug and require
  42. * that alignment-requirement-increasing compiler warnings get
  43. * disabled.
  44. */
  45. rkeys[0] = _mm_loadu_si128((const __m128i *)&key[0]);
  46. /*
  47. * Each of the remaining round keys are computed from the preceding
  48. * round key: rotword+subword+rcon (provided as aeskeygenassist) to
  49. * compute the 'temp' value, then xor with 1, 2, 3, or all 4 of the
  50. * 32-bit words from the preceding round key. Unfortunately, 'rcon'
  51. * is encoded as an immediate value, so we need to write the loop out
  52. * ourselves rather than allowing the compiler to expand it.
  53. */
  54. MKRKEY128(rkeys, 1, 0x01);
  55. MKRKEY128(rkeys, 2, 0x02);
  56. MKRKEY128(rkeys, 3, 0x04);
  57. MKRKEY128(rkeys, 4, 0x08);
  58. MKRKEY128(rkeys, 5, 0x10);
  59. MKRKEY128(rkeys, 6, 0x20);
  60. MKRKEY128(rkeys, 7, 0x40);
  61. MKRKEY128(rkeys, 8, 0x80);
  62. MKRKEY128(rkeys, 9, 0x1b);
  63. MKRKEY128(rkeys, 10, 0x36);
  64. }
  65. /* Compute an AES-256 round key. */
  66. #define MKRKEY256(rkeys, i, shuffle, rcon) do { \
  67. __m128i _s = rkeys[i - 2]; \
  68. __m128i _t = rkeys[i - 1]; \
  69. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
  70. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
  71. _t = _mm_aeskeygenassist_si128(_t, rcon); \
  72. _t = _mm_shuffle_epi32(_t, shuffle); \
  73. rkeys[i] = _mm_xor_si128(_s, _t); \
  74. } while (0)
  75. /**
  76. * crypto_aes_key_expand_256_aesni(key, rkeys):
  77. * Expand the 256-bit AES key ${key} into the 15 round keys ${rkeys}. This
  78. * implementation uses x86 AESNI instructions, and should only be used if
  79. * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
  80. */
  81. static void
  82. crypto_aes_key_expand_256_aesni(const uint8_t key[32], __m128i rkeys[15])
  83. {
  84. /* The first two round keys are just the key. */
  85. /**
  86. * XXX Compiler breakage:
  87. * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
  88. * taking a (const __m128i *) parameter. This forces us to write a
  89. * bug: The cast to (const __m128i *) is invalid since it increases
  90. * the alignment requirement of the pointer. Alas, until compilers
  91. * get fixed intrinsics, all we can do is code the bug and require
  92. * that alignment-requirement-increasing compiler warnings get
  93. * disabled.
  94. */
  95. rkeys[0] = _mm_loadu_si128((const __m128i *)&key[0]);
  96. rkeys[1] = _mm_loadu_si128((const __m128i *)&key[16]);
  97. /*
  98. * Each of the remaining round keys are computed from the preceding
  99. * pair of keys. Even rounds use rotword+subword+rcon, while odd
  100. * rounds just use subword; the aeskeygenassist instruction computes
  101. * both, and we use 0xff or 0xaa to select the one we need. The rcon
  102. * value used is irrelevant for odd rounds since we ignore the value
  103. * which it feeds into. Unfortunately, the 'shuffle' and 'rcon'
  104. * values are encoded into the instructions as immediates, so we need
  105. * to write the loop out ourselves rather than allowing the compiler
  106. * to expand it.
  107. */
  108. MKRKEY256(rkeys, 2, 0xff, 0x01);
  109. MKRKEY256(rkeys, 3, 0xaa, 0x00);
  110. MKRKEY256(rkeys, 4, 0xff, 0x02);
  111. MKRKEY256(rkeys, 5, 0xaa, 0x00);
  112. MKRKEY256(rkeys, 6, 0xff, 0x04);
  113. MKRKEY256(rkeys, 7, 0xaa, 0x00);
  114. MKRKEY256(rkeys, 8, 0xff, 0x08);
  115. MKRKEY256(rkeys, 9, 0xaa, 0x00);
  116. MKRKEY256(rkeys, 10, 0xff, 0x10);
  117. MKRKEY256(rkeys, 11, 0xaa, 0x00);
  118. MKRKEY256(rkeys, 12, 0xff, 0x20);
  119. MKRKEY256(rkeys, 13, 0xaa, 0x00);
  120. MKRKEY256(rkeys, 14, 0xff, 0x40);
  121. }
  122. /**
  123. * crypto_aes_key_expand_aesni(key, len):
  124. * Expand the ${len}-byte AES key ${key} into a structure which can be passed
  125. * to crypto_aes_encrypt_block_aesni. The length must be 16 or 32. This
  126. * implementation uses x86 AESNI instructions, and should only be used if
  127. * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
  128. */
  129. void *
  130. crypto_aes_key_expand_aesni(const uint8_t * key, size_t len)
  131. {
  132. struct crypto_aes_key_aesni * kexp;
  133. size_t rkey_offset;
  134. /* Allocate structure. */
  135. if ((kexp = malloc(sizeof(struct crypto_aes_key_aesni))) == NULL)
  136. goto err0;
  137. /* Figure out where to put the round keys. */
  138. rkey_offset = (uintptr_t)(&kexp->rkeys_buf[0]) % sizeof(__m128i);
  139. rkey_offset = (sizeof(__m128i) - rkey_offset) % sizeof(__m128i);
  140. kexp->rkeys = (void *)&kexp->rkeys_buf[rkey_offset];
  141. /* Compute round keys. */
  142. if (len == 16) {
  143. kexp->nr = 10;
  144. crypto_aes_key_expand_128_aesni(key, kexp->rkeys);
  145. } else if (len == 32) {
  146. kexp->nr = 14;
  147. crypto_aes_key_expand_256_aesni(key, kexp->rkeys);
  148. } else {
  149. warn0("Unsupported AES key length: %zu bytes", len);
  150. goto err1;
  151. }
  152. /* Success! */
  153. return (kexp);
  154. err1:
  155. free(kexp);
  156. err0:
  157. /* Failure! */
  158. return (NULL);
  159. }
  160. /**
  161. * crypto_aes_encrypt_block_aesni(in, out, key):
  162. * Using the expanded AES key ${key}, encrypt the block ${in} and write the
  163. * resulting ciphertext to ${out}. This implementation uses x86 AESNI
  164. * instructions, and should only be used if CPUSUPPORT_X86_AESNI is defined
  165. * and cpusupport_x86_aesni() returns nonzero.
  166. */
  167. void
  168. crypto_aes_encrypt_block_aesni(const uint8_t * in, uint8_t * out,
  169. const void * key)
  170. {
  171. const struct crypto_aes_key_aesni * _key = key;
  172. const __m128i * aes_key = _key->rkeys;
  173. __m128i aes_state;
  174. size_t nr = _key->nr;
  175. aes_state = _mm_loadu_si128((const __m128i *)in);
  176. aes_state = _mm_xor_si128(aes_state, aes_key[0]);
  177. aes_state = _mm_aesenc_si128(aes_state, aes_key[1]);
  178. aes_state = _mm_aesenc_si128(aes_state, aes_key[2]);
  179. aes_state = _mm_aesenc_si128(aes_state, aes_key[3]);
  180. aes_state = _mm_aesenc_si128(aes_state, aes_key[4]);
  181. aes_state = _mm_aesenc_si128(aes_state, aes_key[5]);
  182. aes_state = _mm_aesenc_si128(aes_state, aes_key[6]);
  183. aes_state = _mm_aesenc_si128(aes_state, aes_key[7]);
  184. aes_state = _mm_aesenc_si128(aes_state, aes_key[8]);
  185. aes_state = _mm_aesenc_si128(aes_state, aes_key[9]);
  186. if (nr > 10) {
  187. aes_state = _mm_aesenc_si128(aes_state, aes_key[10]);
  188. aes_state = _mm_aesenc_si128(aes_state, aes_key[11]);
  189. if (nr > 12) {
  190. aes_state = _mm_aesenc_si128(aes_state, aes_key[12]);
  191. aes_state = _mm_aesenc_si128(aes_state, aes_key[13]);
  192. }
  193. }
  194. aes_state = _mm_aesenclast_si128(aes_state, aes_key[nr]);
  195. _mm_storeu_si128((__m128i *)out, aes_state);
  196. }
  197. /**
  198. * crypto_aes_key_free_aesni(key):
  199. * Free the expanded AES key ${key}.
  200. */
  201. void
  202. crypto_aes_key_free_aesni(void * key)
  203. {
  204. /* Behave consistently with free(NULL). */
  205. if (key == NULL)
  206. return;
  207. /* Attempt to zero the expanded key. */
  208. insecure_memzero(key, sizeof(struct crypto_aes_key_aesni));
  209. /* Free the key. */
  210. free(key);
  211. }
  212. #endif /* CPUSUPPORT_X86_AESNI */