aes-spe-glue.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /*
  2. * Glue code for AES implementation for SPE instructions (PPC)
  3. *
  4. * Based on generic implementation. The assembler module takes care
  5. * about the SPE registers so it can run from interrupt context.
  6. *
  7. * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
  8. *
  9. * This program is free software; you can redistribute it and/or modify it
  10. * under the terms of the GNU General Public License as published by the Free
  11. * Software Foundation; either version 2 of the License, or (at your option)
  12. * any later version.
  13. *
  14. */
  15. #include <crypto/aes.h>
  16. #include <linux/module.h>
  17. #include <linux/init.h>
  18. #include <linux/types.h>
  19. #include <linux/errno.h>
  20. #include <linux/crypto.h>
  21. #include <asm/byteorder.h>
  22. #include <asm/switch_to.h>
  23. #include <crypto/algapi.h>
  24. #include <crypto/xts.h>
  25. /*
  26. * MAX_BYTES defines the number of bytes that are allowed to be processed
  27. * between preempt_disable() and preempt_enable(). e500 cores can issue two
  28. * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
  29. * bit unit (SU2). One of these can be a memory access that is executed via
  30. * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
  31. * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
  32. * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
  33. * included. Even with the low end model clocked at 667 MHz this equals to a
  34. * critical time window of less than 30us. The value has been chosen to
  35. * process a 512 byte disk block in one or a large 1400 bytes IPsec network
  36. * packet in two runs.
  37. *
  38. */
  39. #define MAX_BYTES 768
  40. struct ppc_aes_ctx {
  41. u32 key_enc[AES_MAX_KEYLENGTH_U32];
  42. u32 key_dec[AES_MAX_KEYLENGTH_U32];
  43. u32 rounds;
  44. };
  45. struct ppc_xts_ctx {
  46. u32 key_enc[AES_MAX_KEYLENGTH_U32];
  47. u32 key_dec[AES_MAX_KEYLENGTH_U32];
  48. u32 key_twk[AES_MAX_KEYLENGTH_U32];
  49. u32 rounds;
  50. };
  51. extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
  52. extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
  53. extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  54. u32 bytes);
  55. extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  56. u32 bytes);
  57. extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  58. u32 bytes, u8 *iv);
  59. extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  60. u32 bytes, u8 *iv);
  61. extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  62. u32 bytes, u8 *iv);
  63. extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
  64. u32 bytes, u8 *iv, u32 *key_twk);
  65. extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
  66. u32 bytes, u8 *iv, u32 *key_twk);
  67. extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
  68. extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
  69. extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
  70. extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
  71. unsigned int key_len);
  72. static void spe_begin(void)
  73. {
  74. /* disable preemption and save users SPE registers if required */
  75. preempt_disable();
  76. enable_kernel_spe();
  77. }
  78. static void spe_end(void)
  79. {
  80. disable_kernel_spe();
  81. /* reenable preemption */
  82. preempt_enable();
  83. }
  84. static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
  85. unsigned int key_len)
  86. {
  87. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  88. if (key_len != AES_KEYSIZE_128 &&
  89. key_len != AES_KEYSIZE_192 &&
  90. key_len != AES_KEYSIZE_256) {
  91. tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
  92. return -EINVAL;
  93. }
  94. switch (key_len) {
  95. case AES_KEYSIZE_128:
  96. ctx->rounds = 4;
  97. ppc_expand_key_128(ctx->key_enc, in_key);
  98. break;
  99. case AES_KEYSIZE_192:
  100. ctx->rounds = 5;
  101. ppc_expand_key_192(ctx->key_enc, in_key);
  102. break;
  103. case AES_KEYSIZE_256:
  104. ctx->rounds = 6;
  105. ppc_expand_key_256(ctx->key_enc, in_key);
  106. break;
  107. }
  108. ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
  109. return 0;
  110. }
  111. static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
  112. unsigned int key_len)
  113. {
  114. struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
  115. int err;
  116. err = xts_check_key(tfm, in_key, key_len);
  117. if (err)
  118. return err;
  119. key_len >>= 1;
  120. if (key_len != AES_KEYSIZE_128 &&
  121. key_len != AES_KEYSIZE_192 &&
  122. key_len != AES_KEYSIZE_256) {
  123. tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
  124. return -EINVAL;
  125. }
  126. switch (key_len) {
  127. case AES_KEYSIZE_128:
  128. ctx->rounds = 4;
  129. ppc_expand_key_128(ctx->key_enc, in_key);
  130. ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
  131. break;
  132. case AES_KEYSIZE_192:
  133. ctx->rounds = 5;
  134. ppc_expand_key_192(ctx->key_enc, in_key);
  135. ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
  136. break;
  137. case AES_KEYSIZE_256:
  138. ctx->rounds = 6;
  139. ppc_expand_key_256(ctx->key_enc, in_key);
  140. ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
  141. break;
  142. }
  143. ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
  144. return 0;
  145. }
  146. static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
  147. {
  148. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  149. spe_begin();
  150. ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
  151. spe_end();
  152. }
  153. static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
  154. {
  155. struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
  156. spe_begin();
  157. ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
  158. spe_end();
  159. }
  160. static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  161. struct scatterlist *src, unsigned int nbytes)
  162. {
  163. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  164. struct blkcipher_walk walk;
  165. unsigned int ubytes;
  166. int err;
  167. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  168. blkcipher_walk_init(&walk, dst, src, nbytes);
  169. err = blkcipher_walk_virt(desc, &walk);
  170. while ((nbytes = walk.nbytes)) {
  171. ubytes = nbytes > MAX_BYTES ?
  172. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  173. nbytes -= ubytes;
  174. spe_begin();
  175. ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
  176. ctx->key_enc, ctx->rounds, nbytes);
  177. spe_end();
  178. err = blkcipher_walk_done(desc, &walk, ubytes);
  179. }
  180. return err;
  181. }
  182. static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  183. struct scatterlist *src, unsigned int nbytes)
  184. {
  185. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  186. struct blkcipher_walk walk;
  187. unsigned int ubytes;
  188. int err;
  189. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  190. blkcipher_walk_init(&walk, dst, src, nbytes);
  191. err = blkcipher_walk_virt(desc, &walk);
  192. while ((nbytes = walk.nbytes)) {
  193. ubytes = nbytes > MAX_BYTES ?
  194. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  195. nbytes -= ubytes;
  196. spe_begin();
  197. ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
  198. ctx->key_dec, ctx->rounds, nbytes);
  199. spe_end();
  200. err = blkcipher_walk_done(desc, &walk, ubytes);
  201. }
  202. return err;
  203. }
  204. static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  205. struct scatterlist *src, unsigned int nbytes)
  206. {
  207. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  208. struct blkcipher_walk walk;
  209. unsigned int ubytes;
  210. int err;
  211. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  212. blkcipher_walk_init(&walk, dst, src, nbytes);
  213. err = blkcipher_walk_virt(desc, &walk);
  214. while ((nbytes = walk.nbytes)) {
  215. ubytes = nbytes > MAX_BYTES ?
  216. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  217. nbytes -= ubytes;
  218. spe_begin();
  219. ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
  220. ctx->key_enc, ctx->rounds, nbytes, walk.iv);
  221. spe_end();
  222. err = blkcipher_walk_done(desc, &walk, ubytes);
  223. }
  224. return err;
  225. }
  226. static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  227. struct scatterlist *src, unsigned int nbytes)
  228. {
  229. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  230. struct blkcipher_walk walk;
  231. unsigned int ubytes;
  232. int err;
  233. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  234. blkcipher_walk_init(&walk, dst, src, nbytes);
  235. err = blkcipher_walk_virt(desc, &walk);
  236. while ((nbytes = walk.nbytes)) {
  237. ubytes = nbytes > MAX_BYTES ?
  238. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  239. nbytes -= ubytes;
  240. spe_begin();
  241. ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
  242. ctx->key_dec, ctx->rounds, nbytes, walk.iv);
  243. spe_end();
  244. err = blkcipher_walk_done(desc, &walk, ubytes);
  245. }
  246. return err;
  247. }
  248. static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  249. struct scatterlist *src, unsigned int nbytes)
  250. {
  251. struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  252. struct blkcipher_walk walk;
  253. unsigned int pbytes, ubytes;
  254. int err;
  255. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  256. blkcipher_walk_init(&walk, dst, src, nbytes);
  257. err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
  258. while ((pbytes = walk.nbytes)) {
  259. pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
  260. pbytes = pbytes == nbytes ?
  261. nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
  262. ubytes = walk.nbytes - pbytes;
  263. spe_begin();
  264. ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
  265. ctx->key_enc, ctx->rounds, pbytes , walk.iv);
  266. spe_end();
  267. nbytes -= pbytes;
  268. err = blkcipher_walk_done(desc, &walk, ubytes);
  269. }
  270. return err;
  271. }
  272. static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  273. struct scatterlist *src, unsigned int nbytes)
  274. {
  275. struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  276. struct blkcipher_walk walk;
  277. unsigned int ubytes;
  278. int err;
  279. u32 *twk;
  280. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  281. blkcipher_walk_init(&walk, dst, src, nbytes);
  282. err = blkcipher_walk_virt(desc, &walk);
  283. twk = ctx->key_twk;
  284. while ((nbytes = walk.nbytes)) {
  285. ubytes = nbytes > MAX_BYTES ?
  286. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  287. nbytes -= ubytes;
  288. spe_begin();
  289. ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
  290. ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
  291. spe_end();
  292. twk = NULL;
  293. err = blkcipher_walk_done(desc, &walk, ubytes);
  294. }
  295. return err;
  296. }
  297. static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
  298. struct scatterlist *src, unsigned int nbytes)
  299. {
  300. struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
  301. struct blkcipher_walk walk;
  302. unsigned int ubytes;
  303. int err;
  304. u32 *twk;
  305. desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
  306. blkcipher_walk_init(&walk, dst, src, nbytes);
  307. err = blkcipher_walk_virt(desc, &walk);
  308. twk = ctx->key_twk;
  309. while ((nbytes = walk.nbytes)) {
  310. ubytes = nbytes > MAX_BYTES ?
  311. nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
  312. nbytes -= ubytes;
  313. spe_begin();
  314. ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
  315. ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
  316. spe_end();
  317. twk = NULL;
  318. err = blkcipher_walk_done(desc, &walk, ubytes);
  319. }
  320. return err;
  321. }
  322. /*
  323. * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
  324. * because the e500 platform can handle unaligned reads/writes very efficently.
  325. * This improves IPsec thoughput by another few percent. Additionally we assume
  326. * that AES context is always aligned to at least 8 bytes because it is created
  327. * with kmalloc() in the crypto infrastructure
  328. *
  329. */
  330. static struct crypto_alg aes_algs[] = { {
  331. .cra_name = "aes",
  332. .cra_driver_name = "aes-ppc-spe",
  333. .cra_priority = 300,
  334. .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
  335. .cra_blocksize = AES_BLOCK_SIZE,
  336. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  337. .cra_alignmask = 0,
  338. .cra_module = THIS_MODULE,
  339. .cra_u = {
  340. .cipher = {
  341. .cia_min_keysize = AES_MIN_KEY_SIZE,
  342. .cia_max_keysize = AES_MAX_KEY_SIZE,
  343. .cia_setkey = ppc_aes_setkey,
  344. .cia_encrypt = ppc_aes_encrypt,
  345. .cia_decrypt = ppc_aes_decrypt
  346. }
  347. }
  348. }, {
  349. .cra_name = "ecb(aes)",
  350. .cra_driver_name = "ecb-ppc-spe",
  351. .cra_priority = 300,
  352. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  353. .cra_blocksize = AES_BLOCK_SIZE,
  354. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  355. .cra_alignmask = 0,
  356. .cra_type = &crypto_blkcipher_type,
  357. .cra_module = THIS_MODULE,
  358. .cra_u = {
  359. .blkcipher = {
  360. .min_keysize = AES_MIN_KEY_SIZE,
  361. .max_keysize = AES_MAX_KEY_SIZE,
  362. .ivsize = AES_BLOCK_SIZE,
  363. .setkey = ppc_aes_setkey,
  364. .encrypt = ppc_ecb_encrypt,
  365. .decrypt = ppc_ecb_decrypt,
  366. }
  367. }
  368. }, {
  369. .cra_name = "cbc(aes)",
  370. .cra_driver_name = "cbc-ppc-spe",
  371. .cra_priority = 300,
  372. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  373. .cra_blocksize = AES_BLOCK_SIZE,
  374. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  375. .cra_alignmask = 0,
  376. .cra_type = &crypto_blkcipher_type,
  377. .cra_module = THIS_MODULE,
  378. .cra_u = {
  379. .blkcipher = {
  380. .min_keysize = AES_MIN_KEY_SIZE,
  381. .max_keysize = AES_MAX_KEY_SIZE,
  382. .ivsize = AES_BLOCK_SIZE,
  383. .setkey = ppc_aes_setkey,
  384. .encrypt = ppc_cbc_encrypt,
  385. .decrypt = ppc_cbc_decrypt,
  386. }
  387. }
  388. }, {
  389. .cra_name = "ctr(aes)",
  390. .cra_driver_name = "ctr-ppc-spe",
  391. .cra_priority = 300,
  392. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  393. .cra_blocksize = 1,
  394. .cra_ctxsize = sizeof(struct ppc_aes_ctx),
  395. .cra_alignmask = 0,
  396. .cra_type = &crypto_blkcipher_type,
  397. .cra_module = THIS_MODULE,
  398. .cra_u = {
  399. .blkcipher = {
  400. .min_keysize = AES_MIN_KEY_SIZE,
  401. .max_keysize = AES_MAX_KEY_SIZE,
  402. .ivsize = AES_BLOCK_SIZE,
  403. .setkey = ppc_aes_setkey,
  404. .encrypt = ppc_ctr_crypt,
  405. .decrypt = ppc_ctr_crypt,
  406. }
  407. }
  408. }, {
  409. .cra_name = "xts(aes)",
  410. .cra_driver_name = "xts-ppc-spe",
  411. .cra_priority = 300,
  412. .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
  413. .cra_blocksize = AES_BLOCK_SIZE,
  414. .cra_ctxsize = sizeof(struct ppc_xts_ctx),
  415. .cra_alignmask = 0,
  416. .cra_type = &crypto_blkcipher_type,
  417. .cra_module = THIS_MODULE,
  418. .cra_u = {
  419. .blkcipher = {
  420. .min_keysize = AES_MIN_KEY_SIZE * 2,
  421. .max_keysize = AES_MAX_KEY_SIZE * 2,
  422. .ivsize = AES_BLOCK_SIZE,
  423. .setkey = ppc_xts_setkey,
  424. .encrypt = ppc_xts_encrypt,
  425. .decrypt = ppc_xts_decrypt,
  426. }
  427. }
  428. } };
  429. static int __init ppc_aes_mod_init(void)
  430. {
  431. return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
  432. }
  433. static void __exit ppc_aes_mod_fini(void)
  434. {
  435. crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
  436. }
  437. module_init(ppc_aes_mod_init);
  438. module_exit(ppc_aes_mod_fini);
  439. MODULE_LICENSE("GPL");
  440. MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
  441. MODULE_ALIAS_CRYPTO("aes");
  442. MODULE_ALIAS_CRYPTO("ecb(aes)");
  443. MODULE_ALIAS_CRYPTO("cbc(aes)");
  444. MODULE_ALIAS_CRYPTO("ctr(aes)");
  445. MODULE_ALIAS_CRYPTO("xts(aes)");
  446. MODULE_ALIAS_CRYPTO("aes-ppc-spe");