padlock-sha.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. /*
  2. * Cryptographic API.
  3. *
  4. * Support for VIA PadLock hardware crypto engine.
  5. *
  6. * Copyright (c) 2006 Michal Ludvig <michal@logix.cz>
  7. *
  8. * This program is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version.
  12. *
  13. */
  14. #include <crypto/internal/hash.h>
  15. #include <crypto/padlock.h>
  16. #include <crypto/sha.h>
  17. #include <linux/err.h>
  18. #include <linux/module.h>
  19. #include <linux/init.h>
  20. #include <linux/errno.h>
  21. #include <linux/interrupt.h>
  22. #include <linux/kernel.h>
  23. #include <linux/scatterlist.h>
  24. #include <asm/cpu_device_id.h>
  25. #include <asm/fpu/api.h>
  26. struct padlock_sha_desc {
  27. struct shash_desc fallback;
  28. };
  29. struct padlock_sha_ctx {
  30. struct crypto_shash *fallback;
  31. };
  32. static int padlock_sha_init(struct shash_desc *desc)
  33. {
  34. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  35. struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
  36. dctx->fallback.tfm = ctx->fallback;
  37. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  38. return crypto_shash_init(&dctx->fallback);
  39. }
  40. static int padlock_sha_update(struct shash_desc *desc,
  41. const u8 *data, unsigned int length)
  42. {
  43. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  44. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  45. return crypto_shash_update(&dctx->fallback, data, length);
  46. }
  47. static int padlock_sha_export(struct shash_desc *desc, void *out)
  48. {
  49. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  50. return crypto_shash_export(&dctx->fallback, out);
  51. }
  52. static int padlock_sha_import(struct shash_desc *desc, const void *in)
  53. {
  54. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  55. struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
  56. dctx->fallback.tfm = ctx->fallback;
  57. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  58. return crypto_shash_import(&dctx->fallback, in);
  59. }
  60. static inline void padlock_output_block(uint32_t *src,
  61. uint32_t *dst, size_t count)
  62. {
  63. while (count--)
  64. *dst++ = swab32(*src++);
  65. }
  66. static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
  67. unsigned int count, u8 *out)
  68. {
  69. /* We can't store directly to *out as it may be unaligned. */
  70. /* BTW Don't reduce the buffer size below 128 Bytes!
  71. * PadLock microcode needs it that big. */
  72. char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  73. ((aligned(STACK_ALIGN)));
  74. char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  75. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  76. struct sha1_state state;
  77. unsigned int space;
  78. unsigned int leftover;
  79. int err;
  80. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  81. err = crypto_shash_export(&dctx->fallback, &state);
  82. if (err)
  83. goto out;
  84. if (state.count + count > ULONG_MAX)
  85. return crypto_shash_finup(&dctx->fallback, in, count, out);
  86. leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
  87. space = SHA1_BLOCK_SIZE - leftover;
  88. if (space) {
  89. if (count > space) {
  90. err = crypto_shash_update(&dctx->fallback, in, space) ?:
  91. crypto_shash_export(&dctx->fallback, &state);
  92. if (err)
  93. goto out;
  94. count -= space;
  95. in += space;
  96. } else {
  97. memcpy(state.buffer + leftover, in, count);
  98. in = state.buffer;
  99. count += leftover;
  100. state.count &= ~(SHA1_BLOCK_SIZE - 1);
  101. }
  102. }
  103. memcpy(result, &state.state, SHA1_DIGEST_SIZE);
  104. asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
  105. : \
  106. : "c"((unsigned long)state.count + count), \
  107. "a"((unsigned long)state.count), \
  108. "S"(in), "D"(result));
  109. padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
  110. out:
  111. return err;
  112. }
  113. static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
  114. {
  115. u8 buf[4];
  116. return padlock_sha1_finup(desc, buf, 0, out);
  117. }
  118. static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
  119. unsigned int count, u8 *out)
  120. {
  121. /* We can't store directly to *out as it may be unaligned. */
  122. /* BTW Don't reduce the buffer size below 128 Bytes!
  123. * PadLock microcode needs it that big. */
  124. char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  125. ((aligned(STACK_ALIGN)));
  126. char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  127. struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
  128. struct sha256_state state;
  129. unsigned int space;
  130. unsigned int leftover;
  131. int err;
  132. dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
  133. err = crypto_shash_export(&dctx->fallback, &state);
  134. if (err)
  135. goto out;
  136. if (state.count + count > ULONG_MAX)
  137. return crypto_shash_finup(&dctx->fallback, in, count, out);
  138. leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
  139. space = SHA256_BLOCK_SIZE - leftover;
  140. if (space) {
  141. if (count > space) {
  142. err = crypto_shash_update(&dctx->fallback, in, space) ?:
  143. crypto_shash_export(&dctx->fallback, &state);
  144. if (err)
  145. goto out;
  146. count -= space;
  147. in += space;
  148. } else {
  149. memcpy(state.buf + leftover, in, count);
  150. in = state.buf;
  151. count += leftover;
  152. state.count &= ~(SHA1_BLOCK_SIZE - 1);
  153. }
  154. }
  155. memcpy(result, &state.state, SHA256_DIGEST_SIZE);
  156. asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
  157. : \
  158. : "c"((unsigned long)state.count + count), \
  159. "a"((unsigned long)state.count), \
  160. "S"(in), "D"(result));
  161. padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
  162. out:
  163. return err;
  164. }
  165. static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
  166. {
  167. u8 buf[4];
  168. return padlock_sha256_finup(desc, buf, 0, out);
  169. }
  170. static int padlock_cra_init(struct crypto_tfm *tfm)
  171. {
  172. struct crypto_shash *hash = __crypto_shash_cast(tfm);
  173. const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
  174. struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
  175. struct crypto_shash *fallback_tfm;
  176. int err = -ENOMEM;
  177. /* Allocate a fallback and abort if it failed. */
  178. fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
  179. CRYPTO_ALG_NEED_FALLBACK);
  180. if (IS_ERR(fallback_tfm)) {
  181. printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
  182. fallback_driver_name);
  183. err = PTR_ERR(fallback_tfm);
  184. goto out;
  185. }
  186. ctx->fallback = fallback_tfm;
  187. hash->descsize += crypto_shash_descsize(fallback_tfm);
  188. return 0;
  189. out:
  190. return err;
  191. }
  192. static void padlock_cra_exit(struct crypto_tfm *tfm)
  193. {
  194. struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
  195. crypto_free_shash(ctx->fallback);
  196. }
  197. static struct shash_alg sha1_alg = {
  198. .digestsize = SHA1_DIGEST_SIZE,
  199. .init = padlock_sha_init,
  200. .update = padlock_sha_update,
  201. .finup = padlock_sha1_finup,
  202. .final = padlock_sha1_final,
  203. .export = padlock_sha_export,
  204. .import = padlock_sha_import,
  205. .descsize = sizeof(struct padlock_sha_desc),
  206. .statesize = sizeof(struct sha1_state),
  207. .base = {
  208. .cra_name = "sha1",
  209. .cra_driver_name = "sha1-padlock",
  210. .cra_priority = PADLOCK_CRA_PRIORITY,
  211. .cra_flags = CRYPTO_ALG_TYPE_SHASH |
  212. CRYPTO_ALG_NEED_FALLBACK,
  213. .cra_blocksize = SHA1_BLOCK_SIZE,
  214. .cra_ctxsize = sizeof(struct padlock_sha_ctx),
  215. .cra_module = THIS_MODULE,
  216. .cra_init = padlock_cra_init,
  217. .cra_exit = padlock_cra_exit,
  218. }
  219. };
  220. static struct shash_alg sha256_alg = {
  221. .digestsize = SHA256_DIGEST_SIZE,
  222. .init = padlock_sha_init,
  223. .update = padlock_sha_update,
  224. .finup = padlock_sha256_finup,
  225. .final = padlock_sha256_final,
  226. .export = padlock_sha_export,
  227. .import = padlock_sha_import,
  228. .descsize = sizeof(struct padlock_sha_desc),
  229. .statesize = sizeof(struct sha256_state),
  230. .base = {
  231. .cra_name = "sha256",
  232. .cra_driver_name = "sha256-padlock",
  233. .cra_priority = PADLOCK_CRA_PRIORITY,
  234. .cra_flags = CRYPTO_ALG_TYPE_SHASH |
  235. CRYPTO_ALG_NEED_FALLBACK,
  236. .cra_blocksize = SHA256_BLOCK_SIZE,
  237. .cra_ctxsize = sizeof(struct padlock_sha_ctx),
  238. .cra_module = THIS_MODULE,
  239. .cra_init = padlock_cra_init,
  240. .cra_exit = padlock_cra_exit,
  241. }
  242. };
  243. /* Add two shash_alg instance for hardware-implemented *
  244. * multiple-parts hash supported by VIA Nano Processor.*/
  245. static int padlock_sha1_init_nano(struct shash_desc *desc)
  246. {
  247. struct sha1_state *sctx = shash_desc_ctx(desc);
  248. *sctx = (struct sha1_state){
  249. .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
  250. };
  251. return 0;
  252. }
  253. static int padlock_sha1_update_nano(struct shash_desc *desc,
  254. const u8 *data, unsigned int len)
  255. {
  256. struct sha1_state *sctx = shash_desc_ctx(desc);
  257. unsigned int partial, done;
  258. const u8 *src;
  259. /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
  260. u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  261. ((aligned(STACK_ALIGN)));
  262. u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  263. partial = sctx->count & 0x3f;
  264. sctx->count += len;
  265. done = 0;
  266. src = data;
  267. memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
  268. if ((partial + len) >= SHA1_BLOCK_SIZE) {
  269. /* Append the bytes in state's buffer to a block to handle */
  270. if (partial) {
  271. done = -partial;
  272. memcpy(sctx->buffer + partial, data,
  273. done + SHA1_BLOCK_SIZE);
  274. src = sctx->buffer;
  275. asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
  276. : "+S"(src), "+D"(dst) \
  277. : "a"((long)-1), "c"((unsigned long)1));
  278. done += SHA1_BLOCK_SIZE;
  279. src = data + done;
  280. }
  281. /* Process the left bytes from the input data */
  282. if (len - done >= SHA1_BLOCK_SIZE) {
  283. asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
  284. : "+S"(src), "+D"(dst)
  285. : "a"((long)-1),
  286. "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
  287. done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
  288. src = data + done;
  289. }
  290. partial = 0;
  291. }
  292. memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
  293. memcpy(sctx->buffer + partial, src, len - done);
  294. return 0;
  295. }
  296. static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
  297. {
  298. struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
  299. unsigned int partial, padlen;
  300. __be64 bits;
  301. static const u8 padding[64] = { 0x80, };
  302. bits = cpu_to_be64(state->count << 3);
  303. /* Pad out to 56 mod 64 */
  304. partial = state->count & 0x3f;
  305. padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
  306. padlock_sha1_update_nano(desc, padding, padlen);
  307. /* Append length field bytes */
  308. padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
  309. /* Swap to output */
  310. padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
  311. return 0;
  312. }
  313. static int padlock_sha256_init_nano(struct shash_desc *desc)
  314. {
  315. struct sha256_state *sctx = shash_desc_ctx(desc);
  316. *sctx = (struct sha256_state){
  317. .state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
  318. SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
  319. };
  320. return 0;
  321. }
  322. static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
  323. unsigned int len)
  324. {
  325. struct sha256_state *sctx = shash_desc_ctx(desc);
  326. unsigned int partial, done;
  327. const u8 *src;
  328. /*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
  329. u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
  330. ((aligned(STACK_ALIGN)));
  331. u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
  332. partial = sctx->count & 0x3f;
  333. sctx->count += len;
  334. done = 0;
  335. src = data;
  336. memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
  337. if ((partial + len) >= SHA256_BLOCK_SIZE) {
  338. /* Append the bytes in state's buffer to a block to handle */
  339. if (partial) {
  340. done = -partial;
  341. memcpy(sctx->buf + partial, data,
  342. done + SHA256_BLOCK_SIZE);
  343. src = sctx->buf;
  344. asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
  345. : "+S"(src), "+D"(dst)
  346. : "a"((long)-1), "c"((unsigned long)1));
  347. done += SHA256_BLOCK_SIZE;
  348. src = data + done;
  349. }
  350. /* Process the left bytes from input data*/
  351. if (len - done >= SHA256_BLOCK_SIZE) {
  352. asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
  353. : "+S"(src), "+D"(dst)
  354. : "a"((long)-1),
  355. "c"((unsigned long)((len - done) / 64)));
  356. done += ((len - done) - (len - done) % 64);
  357. src = data + done;
  358. }
  359. partial = 0;
  360. }
  361. memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
  362. memcpy(sctx->buf + partial, src, len - done);
  363. return 0;
  364. }
  365. static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
  366. {
  367. struct sha256_state *state =
  368. (struct sha256_state *)shash_desc_ctx(desc);
  369. unsigned int partial, padlen;
  370. __be64 bits;
  371. static const u8 padding[64] = { 0x80, };
  372. bits = cpu_to_be64(state->count << 3);
  373. /* Pad out to 56 mod 64 */
  374. partial = state->count & 0x3f;
  375. padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
  376. padlock_sha256_update_nano(desc, padding, padlen);
  377. /* Append length field bytes */
  378. padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
  379. /* Swap to output */
  380. padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
  381. return 0;
  382. }
  383. static int padlock_sha_export_nano(struct shash_desc *desc,
  384. void *out)
  385. {
  386. int statesize = crypto_shash_statesize(desc->tfm);
  387. void *sctx = shash_desc_ctx(desc);
  388. memcpy(out, sctx, statesize);
  389. return 0;
  390. }
  391. static int padlock_sha_import_nano(struct shash_desc *desc,
  392. const void *in)
  393. {
  394. int statesize = crypto_shash_statesize(desc->tfm);
  395. void *sctx = shash_desc_ctx(desc);
  396. memcpy(sctx, in, statesize);
  397. return 0;
  398. }
  399. static struct shash_alg sha1_alg_nano = {
  400. .digestsize = SHA1_DIGEST_SIZE,
  401. .init = padlock_sha1_init_nano,
  402. .update = padlock_sha1_update_nano,
  403. .final = padlock_sha1_final_nano,
  404. .export = padlock_sha_export_nano,
  405. .import = padlock_sha_import_nano,
  406. .descsize = sizeof(struct sha1_state),
  407. .statesize = sizeof(struct sha1_state),
  408. .base = {
  409. .cra_name = "sha1",
  410. .cra_driver_name = "sha1-padlock-nano",
  411. .cra_priority = PADLOCK_CRA_PRIORITY,
  412. .cra_flags = CRYPTO_ALG_TYPE_SHASH,
  413. .cra_blocksize = SHA1_BLOCK_SIZE,
  414. .cra_module = THIS_MODULE,
  415. }
  416. };
  417. static struct shash_alg sha256_alg_nano = {
  418. .digestsize = SHA256_DIGEST_SIZE,
  419. .init = padlock_sha256_init_nano,
  420. .update = padlock_sha256_update_nano,
  421. .final = padlock_sha256_final_nano,
  422. .export = padlock_sha_export_nano,
  423. .import = padlock_sha_import_nano,
  424. .descsize = sizeof(struct sha256_state),
  425. .statesize = sizeof(struct sha256_state),
  426. .base = {
  427. .cra_name = "sha256",
  428. .cra_driver_name = "sha256-padlock-nano",
  429. .cra_priority = PADLOCK_CRA_PRIORITY,
  430. .cra_flags = CRYPTO_ALG_TYPE_SHASH,
  431. .cra_blocksize = SHA256_BLOCK_SIZE,
  432. .cra_module = THIS_MODULE,
  433. }
  434. };
  435. static struct x86_cpu_id padlock_sha_ids[] = {
  436. X86_FEATURE_MATCH(X86_FEATURE_PHE),
  437. {}
  438. };
  439. MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
  440. static int __init padlock_init(void)
  441. {
  442. int rc = -ENODEV;
  443. struct cpuinfo_x86 *c = &cpu_data(0);
  444. struct shash_alg *sha1;
  445. struct shash_alg *sha256;
  446. if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
  447. return -ENODEV;
  448. /* Register the newly added algorithm module if on *
  449. * VIA Nano processor, or else just do as before */
  450. if (c->x86_model < 0x0f) {
  451. sha1 = &sha1_alg;
  452. sha256 = &sha256_alg;
  453. } else {
  454. sha1 = &sha1_alg_nano;
  455. sha256 = &sha256_alg_nano;
  456. }
  457. rc = crypto_register_shash(sha1);
  458. if (rc)
  459. goto out;
  460. rc = crypto_register_shash(sha256);
  461. if (rc)
  462. goto out_unreg1;
  463. printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
  464. return 0;
  465. out_unreg1:
  466. crypto_unregister_shash(sha1);
  467. out:
  468. printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
  469. return rc;
  470. }
  471. static void __exit padlock_fini(void)
  472. {
  473. struct cpuinfo_x86 *c = &cpu_data(0);
  474. if (c->x86_model >= 0x0f) {
  475. crypto_unregister_shash(&sha1_alg_nano);
  476. crypto_unregister_shash(&sha256_alg_nano);
  477. } else {
  478. crypto_unregister_shash(&sha1_alg);
  479. crypto_unregister_shash(&sha256_alg);
  480. }
  481. }
  482. module_init(padlock_init);
  483. module_exit(padlock_fini);
  484. MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
  485. MODULE_LICENSE("GPL");
  486. MODULE_AUTHOR("Michal Ludvig");
  487. MODULE_ALIAS_CRYPTO("sha1-all");
  488. MODULE_ALIAS_CRYPTO("sha256-all");
  489. MODULE_ALIAS_CRYPTO("sha1-padlock");
  490. MODULE_ALIAS_CRYPTO("sha256-padlock");