crypto_aes_aesni.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. #include "cpusupport.h"
  2. #ifdef CPUSUPPORT_X86_AESNI
  3. /**
  4. * CPUSUPPORT CFLAGS: X86_AESNI
  5. */
  6. #include <stdint.h>
  7. #include <stdlib.h>
  8. #include <wmmintrin.h>
  9. #include "align_ptr.h"
  10. #include "insecure_memzero.h"
  11. #include "warnp.h"
  12. #include "crypto_aes_aesni.h"
  13. #include "crypto_aes_aesni_m128i.h"
  14. /* Expanded-key structure. */
  15. struct crypto_aes_key_aesni {
  16. ALIGN_PTR_DECL(__m128i, rkeys, 15, sizeof(__m128i));
  17. size_t nr;
  18. };
  19. /* Compute an AES-128 round key. */
  20. #define MKRKEY128(rkeys, i, rcon) do { \
  21. __m128i _s = rkeys[i - 1]; \
  22. __m128i _t = rkeys[i - 1]; \
  23. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
  24. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
  25. _t = _mm_aeskeygenassist_si128(_t, rcon); \
  26. _t = _mm_shuffle_epi32(_t, 0xff); \
  27. rkeys[i] = _mm_xor_si128(_s, _t); \
  28. } while (0)
  29. /**
  30. * crypto_aes_key_expand_128_aesni(key_unexpanded, rkeys):
  31. * Expand the 128-bit AES unexpanded key ${key_unexpanded} into the 11 round
  32. * keys ${rkeys}. This implementation uses x86 AESNI instructions, and should
  33. * only be used if CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni()
  34. * returns nonzero.
  35. */
  36. static void
  37. crypto_aes_key_expand_128_aesni(const uint8_t key_unexpanded[16],
  38. __m128i rkeys[11])
  39. {
  40. /* The first round key is just the key. */
  41. /*-
  42. * XXX Compiler breakage:
  43. * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
  44. * taking a (const __m128i *) parameter. This forces us to write a
  45. * bug: The cast to (const __m128i *) is invalid since it increases
  46. * the alignment requirement of the pointer. Alas, until compilers
  47. * get fixed intrinsics, all we can do is code the bug and require
  48. * that alignment-requirement-increasing compiler warnings get
  49. * disabled.
  50. */
  51. rkeys[0] = _mm_loadu_si128((const __m128i *)&key_unexpanded[0]);
  52. /*
  53. * Each of the remaining round keys are computed from the preceding
  54. * round key: rotword+subword+rcon (provided as aeskeygenassist) to
  55. * compute the 'temp' value, then xor with 1, 2, 3, or all 4 of the
  56. * 32-bit words from the preceding round key. Unfortunately, 'rcon'
  57. * is encoded as an immediate value, so we need to write the loop out
  58. * ourselves rather than allowing the compiler to expand it.
  59. */
  60. MKRKEY128(rkeys, 1, 0x01);
  61. MKRKEY128(rkeys, 2, 0x02);
  62. MKRKEY128(rkeys, 3, 0x04);
  63. MKRKEY128(rkeys, 4, 0x08);
  64. MKRKEY128(rkeys, 5, 0x10);
  65. MKRKEY128(rkeys, 6, 0x20);
  66. MKRKEY128(rkeys, 7, 0x40);
  67. MKRKEY128(rkeys, 8, 0x80);
  68. MKRKEY128(rkeys, 9, 0x1b);
  69. MKRKEY128(rkeys, 10, 0x36);
  70. }
  71. /* Compute an AES-256 round key. */
  72. #define MKRKEY256(rkeys, i, shuffle, rcon) do { \
  73. __m128i _s = rkeys[i - 2]; \
  74. __m128i _t = rkeys[i - 1]; \
  75. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 4)); \
  76. _s = _mm_xor_si128(_s, _mm_slli_si128(_s, 8)); \
  77. _t = _mm_aeskeygenassist_si128(_t, rcon); \
  78. _t = _mm_shuffle_epi32(_t, shuffle); \
  79. rkeys[i] = _mm_xor_si128(_s, _t); \
  80. } while (0)
  81. /**
  82. * crypto_aes_key_expand_256_aesni(key_unexpanded, rkeys):
  83. * Expand the 256-bit unexpanded AES key ${key_unexpanded} into the 15 round
  84. * keys ${rkeys}. This implementation uses x86 AESNI instructions, and should
  85. * only be used if CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni()
  86. * returns nonzero.
  87. */
  88. static void
  89. crypto_aes_key_expand_256_aesni(const uint8_t key_unexpanded[32],
  90. __m128i rkeys[15])
  91. {
  92. /* The first two round keys are just the key. */
  93. /*-
  94. * XXX Compiler breakage:
  95. * The intrinsic defined by Intel for _mm_loadu_si128 defines it as
  96. * taking a (const __m128i *) parameter. This forces us to write a
  97. * bug: The cast to (const __m128i *) is invalid since it increases
  98. * the alignment requirement of the pointer. Alas, until compilers
  99. * get fixed intrinsics, all we can do is code the bug and require
  100. * that alignment-requirement-increasing compiler warnings get
  101. * disabled.
  102. */
  103. rkeys[0] = _mm_loadu_si128((const __m128i *)&key_unexpanded[0]);
  104. rkeys[1] = _mm_loadu_si128((const __m128i *)&key_unexpanded[16]);
  105. /*
  106. * Each of the remaining round keys are computed from the preceding
  107. * pair of keys. Even rounds use rotword+subword+rcon, while odd
  108. * rounds just use subword; the aeskeygenassist instruction computes
  109. * both, and we use 0xff or 0xaa to select the one we need. The rcon
  110. * value used is irrelevant for odd rounds since we ignore the value
  111. * which it feeds into. Unfortunately, the 'shuffle' and 'rcon'
  112. * values are encoded into the instructions as immediates, so we need
  113. * to write the loop out ourselves rather than allowing the compiler
  114. * to expand it.
  115. */
  116. MKRKEY256(rkeys, 2, 0xff, 0x01);
  117. MKRKEY256(rkeys, 3, 0xaa, 0x00);
  118. MKRKEY256(rkeys, 4, 0xff, 0x02);
  119. MKRKEY256(rkeys, 5, 0xaa, 0x00);
  120. MKRKEY256(rkeys, 6, 0xff, 0x04);
  121. MKRKEY256(rkeys, 7, 0xaa, 0x00);
  122. MKRKEY256(rkeys, 8, 0xff, 0x08);
  123. MKRKEY256(rkeys, 9, 0xaa, 0x00);
  124. MKRKEY256(rkeys, 10, 0xff, 0x10);
  125. MKRKEY256(rkeys, 11, 0xaa, 0x00);
  126. MKRKEY256(rkeys, 12, 0xff, 0x20);
  127. MKRKEY256(rkeys, 13, 0xaa, 0x00);
  128. MKRKEY256(rkeys, 14, 0xff, 0x40);
  129. }
  130. /**
  131. * crypto_aes_key_expand_aesni(key_unexpanded, len):
  132. * Expand the ${len}-byte unexpanded AES key ${key_unexpanded} into a
  133. * structure which can be passed to crypto_aes_encrypt_block_aesni(). The
  134. * length must be 16 or 32. This implementation uses x86 AESNI instructions,
  135. * and should only be used if CPUSUPPORT_X86_AESNI is defined and
  136. * cpusupport_x86_aesni() returns nonzero.
  137. */
  138. void *
  139. crypto_aes_key_expand_aesni(const uint8_t * key_unexpanded, size_t len)
  140. {
  141. struct crypto_aes_key_aesni * kexp;
  142. /* Allocate structure. */
  143. if ((kexp = malloc(sizeof(struct crypto_aes_key_aesni))) == NULL)
  144. goto err0;
  145. /* Figure out where to put the round keys. */
  146. ALIGN_PTR_INIT(kexp->rkeys, sizeof(__m128i));
  147. /* Compute round keys. */
  148. if (len == 16) {
  149. kexp->nr = 10;
  150. crypto_aes_key_expand_128_aesni(key_unexpanded, kexp->rkeys);
  151. } else if (len == 32) {
  152. kexp->nr = 14;
  153. crypto_aes_key_expand_256_aesni(key_unexpanded, kexp->rkeys);
  154. } else {
  155. warn0("Unsupported AES key length: %zu bytes", len);
  156. goto err1;
  157. }
  158. /* Success! */
  159. return (kexp);
  160. err1:
  161. free(kexp);
  162. err0:
  163. /* Failure! */
  164. return (NULL);
  165. }
  166. /**
  167. * crypto_aes_encrypt_block_aesni_m128i(in, key):
  168. * Using the expanded AES key ${key}, encrypt the block ${in} and return the
  169. * resulting ciphertext. This implementation uses x86 AESNI instructions,
  170. * and should only be used if CPUSUPPORT_X86_AESNI is defined and
  171. * cpusupport_x86_aesni() returns nonzero.
  172. */
  173. __m128i
  174. crypto_aes_encrypt_block_aesni_m128i(__m128i in, const void * key)
  175. {
  176. const struct crypto_aes_key_aesni * _key = key;
  177. const __m128i * aes_key = _key->rkeys;
  178. __m128i aes_state = in;
  179. size_t nr = _key->nr;
  180. aes_state = _mm_xor_si128(aes_state, aes_key[0]);
  181. aes_state = _mm_aesenc_si128(aes_state, aes_key[1]);
  182. aes_state = _mm_aesenc_si128(aes_state, aes_key[2]);
  183. aes_state = _mm_aesenc_si128(aes_state, aes_key[3]);
  184. aes_state = _mm_aesenc_si128(aes_state, aes_key[4]);
  185. aes_state = _mm_aesenc_si128(aes_state, aes_key[5]);
  186. aes_state = _mm_aesenc_si128(aes_state, aes_key[6]);
  187. aes_state = _mm_aesenc_si128(aes_state, aes_key[7]);
  188. aes_state = _mm_aesenc_si128(aes_state, aes_key[8]);
  189. aes_state = _mm_aesenc_si128(aes_state, aes_key[9]);
  190. if (nr > 10) {
  191. aes_state = _mm_aesenc_si128(aes_state, aes_key[10]);
  192. aes_state = _mm_aesenc_si128(aes_state, aes_key[11]);
  193. aes_state = _mm_aesenc_si128(aes_state, aes_key[12]);
  194. aes_state = _mm_aesenc_si128(aes_state, aes_key[13]);
  195. }
  196. aes_state = _mm_aesenclast_si128(aes_state, aes_key[nr]);
  197. return (aes_state);
  198. }
  199. /**
  200. * crypto_aes_encrypt_block_aesni(in, out, key):
  201. * Using the expanded AES key ${key}, encrypt the block ${in} and write the
  202. * resulting ciphertext to ${out}. ${in} and ${out} can overlap. This
  203. * implementation uses x86 AESNI instructions, and should only be used if
  204. * CPUSUPPORT_X86_AESNI is defined and cpusupport_x86_aesni() returns nonzero.
  205. */
  206. void
  207. crypto_aes_encrypt_block_aesni(const uint8_t in[16], uint8_t out[16],
  208. const void * key)
  209. {
  210. __m128i aes_state;
  211. aes_state = _mm_loadu_si128((const __m128i *)in);
  212. aes_state = crypto_aes_encrypt_block_aesni_m128i(aes_state, key);
  213. _mm_storeu_si128((__m128i *)out, aes_state);
  214. }
  215. /**
  216. * crypto_aes_key_free_aesni(key):
  217. * Free the expanded AES key ${key}.
  218. */
  219. void
  220. crypto_aes_key_free_aesni(void * key)
  221. {
  222. /* Behave consistently with free(NULL). */
  223. if (key == NULL)
  224. return;
  225. /* Attempt to zero the expanded key. */
  226. insecure_memzero(key, sizeof(struct crypto_aes_key_aesni));
  227. /* Free the key. */
  228. free(key);
  229. }
  230. #endif /* CPUSUPPORT_X86_AESNI */