sha1_ni_asm.S 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /*
  2. * Intel SHA Extensions optimized implementation of a SHA-1 update function
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * Copyright(c) 2015 Intel Corporation.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of version 2 of the GNU General Public License as
  13. * published by the Free Software Foundation.
  14. *
  15. * This program is distributed in the hope that it will be useful, but
  16. * WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * General Public License for more details.
  19. *
  20. * Contact Information:
  21. * Sean Gulley <sean.m.gulley@intel.com>
  22. * Tim Chen <tim.c.chen@linux.intel.com>
  23. *
  24. * BSD LICENSE
  25. *
  26. * Copyright(c) 2015 Intel Corporation.
  27. *
  28. * Redistribution and use in source and binary forms, with or without
  29. * modification, are permitted provided that the following conditions
  30. * are met:
  31. *
  32. * * Redistributions of source code must retain the above copyright
  33. * notice, this list of conditions and the following disclaimer.
  34. * * Redistributions in binary form must reproduce the above copyright
  35. * notice, this list of conditions and the following disclaimer in
  36. * the documentation and/or other materials provided with the
  37. * distribution.
  38. * * Neither the name of Intel Corporation nor the names of its
  39. * contributors may be used to endorse or promote products derived
  40. * from this software without specific prior written permission.
  41. *
  42. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53. *
  54. */
  55. #include <linux/linkage.h>
  56. #define DIGEST_PTR %rdi /* 1st arg */
  57. #define DATA_PTR %rsi /* 2nd arg */
  58. #define NUM_BLKS %rdx /* 3rd arg */
  59. #define RSPSAVE %rax
  60. /* gcc conversion */
  61. #define FRAME_SIZE 32 /* space for 2x16 bytes */
  62. #define ABCD %xmm0
  63. #define E0 %xmm1 /* Need two E's b/c they ping pong */
  64. #define E1 %xmm2
  65. #define MSG0 %xmm3
  66. #define MSG1 %xmm4
  67. #define MSG2 %xmm5
  68. #define MSG3 %xmm6
  69. #define SHUF_MASK %xmm7
  70. /*
  71. * Intel SHA Extensions optimized implementation of a SHA-1 update function
  72. *
  73. * The function takes a pointer to the current hash values, a pointer to the
  74. * input data, and a number of 64 byte blocks to process. Once all blocks have
  75. * been processed, the digest pointer is updated with the resulting hash value.
  76. * The function only processes complete blocks, there is no functionality to
  77. * store partial blocks. All message padding and hash value initialization must
  78. * be done outside the update function.
  79. *
  80. * The indented lines in the loop are instructions related to rounds processing.
  81. * The non-indented lines are instructions related to the message schedule.
  82. *
  83. * void sha1_ni_transform(uint32_t *digest, const void *data,
  84. uint32_t numBlocks)
  85. * digest : pointer to digest
  86. * data: pointer to input data
  87. * numBlocks: Number of blocks to process
  88. */
  89. .text
  90. .align 32
  91. ENTRY(sha1_ni_transform)
  92. mov %rsp, RSPSAVE
  93. sub $FRAME_SIZE, %rsp
  94. and $~0xF, %rsp
  95. shl $6, NUM_BLKS /* convert to bytes */
  96. jz .Ldone_hash
  97. add DATA_PTR, NUM_BLKS /* pointer to end of data */
  98. /* load initial hash values */
  99. pinsrd $3, 1*16(DIGEST_PTR), E0
  100. movdqu 0*16(DIGEST_PTR), ABCD
  101. pand UPPER_WORD_MASK(%rip), E0
  102. pshufd $0x1B, ABCD, ABCD
  103. movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
  104. .Lloop0:
  105. /* Save hash values for addition after rounds */
  106. movdqa E0, (0*16)(%rsp)
  107. movdqa ABCD, (1*16)(%rsp)
  108. /* Rounds 0-3 */
  109. movdqu 0*16(DATA_PTR), MSG0
  110. pshufb SHUF_MASK, MSG0
  111. paddd MSG0, E0
  112. movdqa ABCD, E1
  113. sha1rnds4 $0, E0, ABCD
  114. /* Rounds 4-7 */
  115. movdqu 1*16(DATA_PTR), MSG1
  116. pshufb SHUF_MASK, MSG1
  117. sha1nexte MSG1, E1
  118. movdqa ABCD, E0
  119. sha1rnds4 $0, E1, ABCD
  120. sha1msg1 MSG1, MSG0
  121. /* Rounds 8-11 */
  122. movdqu 2*16(DATA_PTR), MSG2
  123. pshufb SHUF_MASK, MSG2
  124. sha1nexte MSG2, E0
  125. movdqa ABCD, E1
  126. sha1rnds4 $0, E0, ABCD
  127. sha1msg1 MSG2, MSG1
  128. pxor MSG2, MSG0
  129. /* Rounds 12-15 */
  130. movdqu 3*16(DATA_PTR), MSG3
  131. pshufb SHUF_MASK, MSG3
  132. sha1nexte MSG3, E1
  133. movdqa ABCD, E0
  134. sha1msg2 MSG3, MSG0
  135. sha1rnds4 $0, E1, ABCD
  136. sha1msg1 MSG3, MSG2
  137. pxor MSG3, MSG1
  138. /* Rounds 16-19 */
  139. sha1nexte MSG0, E0
  140. movdqa ABCD, E1
  141. sha1msg2 MSG0, MSG1
  142. sha1rnds4 $0, E0, ABCD
  143. sha1msg1 MSG0, MSG3
  144. pxor MSG0, MSG2
  145. /* Rounds 20-23 */
  146. sha1nexte MSG1, E1
  147. movdqa ABCD, E0
  148. sha1msg2 MSG1, MSG2
  149. sha1rnds4 $1, E1, ABCD
  150. sha1msg1 MSG1, MSG0
  151. pxor MSG1, MSG3
  152. /* Rounds 24-27 */
  153. sha1nexte MSG2, E0
  154. movdqa ABCD, E1
  155. sha1msg2 MSG2, MSG3
  156. sha1rnds4 $1, E0, ABCD
  157. sha1msg1 MSG2, MSG1
  158. pxor MSG2, MSG0
  159. /* Rounds 28-31 */
  160. sha1nexte MSG3, E1
  161. movdqa ABCD, E0
  162. sha1msg2 MSG3, MSG0
  163. sha1rnds4 $1, E1, ABCD
  164. sha1msg1 MSG3, MSG2
  165. pxor MSG3, MSG1
  166. /* Rounds 32-35 */
  167. sha1nexte MSG0, E0
  168. movdqa ABCD, E1
  169. sha1msg2 MSG0, MSG1
  170. sha1rnds4 $1, E0, ABCD
  171. sha1msg1 MSG0, MSG3
  172. pxor MSG0, MSG2
  173. /* Rounds 36-39 */
  174. sha1nexte MSG1, E1
  175. movdqa ABCD, E0
  176. sha1msg2 MSG1, MSG2
  177. sha1rnds4 $1, E1, ABCD
  178. sha1msg1 MSG1, MSG0
  179. pxor MSG1, MSG3
  180. /* Rounds 40-43 */
  181. sha1nexte MSG2, E0
  182. movdqa ABCD, E1
  183. sha1msg2 MSG2, MSG3
  184. sha1rnds4 $2, E0, ABCD
  185. sha1msg1 MSG2, MSG1
  186. pxor MSG2, MSG0
  187. /* Rounds 44-47 */
  188. sha1nexte MSG3, E1
  189. movdqa ABCD, E0
  190. sha1msg2 MSG3, MSG0
  191. sha1rnds4 $2, E1, ABCD
  192. sha1msg1 MSG3, MSG2
  193. pxor MSG3, MSG1
  194. /* Rounds 48-51 */
  195. sha1nexte MSG0, E0
  196. movdqa ABCD, E1
  197. sha1msg2 MSG0, MSG1
  198. sha1rnds4 $2, E0, ABCD
  199. sha1msg1 MSG0, MSG3
  200. pxor MSG0, MSG2
  201. /* Rounds 52-55 */
  202. sha1nexte MSG1, E1
  203. movdqa ABCD, E0
  204. sha1msg2 MSG1, MSG2
  205. sha1rnds4 $2, E1, ABCD
  206. sha1msg1 MSG1, MSG0
  207. pxor MSG1, MSG3
  208. /* Rounds 56-59 */
  209. sha1nexte MSG2, E0
  210. movdqa ABCD, E1
  211. sha1msg2 MSG2, MSG3
  212. sha1rnds4 $2, E0, ABCD
  213. sha1msg1 MSG2, MSG1
  214. pxor MSG2, MSG0
  215. /* Rounds 60-63 */
  216. sha1nexte MSG3, E1
  217. movdqa ABCD, E0
  218. sha1msg2 MSG3, MSG0
  219. sha1rnds4 $3, E1, ABCD
  220. sha1msg1 MSG3, MSG2
  221. pxor MSG3, MSG1
  222. /* Rounds 64-67 */
  223. sha1nexte MSG0, E0
  224. movdqa ABCD, E1
  225. sha1msg2 MSG0, MSG1
  226. sha1rnds4 $3, E0, ABCD
  227. sha1msg1 MSG0, MSG3
  228. pxor MSG0, MSG2
  229. /* Rounds 68-71 */
  230. sha1nexte MSG1, E1
  231. movdqa ABCD, E0
  232. sha1msg2 MSG1, MSG2
  233. sha1rnds4 $3, E1, ABCD
  234. pxor MSG1, MSG3
  235. /* Rounds 72-75 */
  236. sha1nexte MSG2, E0
  237. movdqa ABCD, E1
  238. sha1msg2 MSG2, MSG3
  239. sha1rnds4 $3, E0, ABCD
  240. /* Rounds 76-79 */
  241. sha1nexte MSG3, E1
  242. movdqa ABCD, E0
  243. sha1rnds4 $3, E1, ABCD
  244. /* Add current hash values with previously saved */
  245. sha1nexte (0*16)(%rsp), E0
  246. paddd (1*16)(%rsp), ABCD
  247. /* Increment data pointer and loop if more to process */
  248. add $64, DATA_PTR
  249. cmp NUM_BLKS, DATA_PTR
  250. jne .Lloop0
  251. /* Write hash values back in the correct order */
  252. pshufd $0x1B, ABCD, ABCD
  253. movdqu ABCD, 0*16(DIGEST_PTR)
  254. pextrd $3, E0, 1*16(DIGEST_PTR)
  255. .Ldone_hash:
  256. mov RSPSAVE, %rsp
  257. ret
  258. ENDPROC(sha1_ni_transform)
  259. .data
  260. .align 64
  261. PSHUFFLE_BYTE_FLIP_MASK:
  262. .octa 0x000102030405060708090a0b0c0d0e0f
  263. UPPER_WORD_MASK:
  264. .octa 0xFFFFFFFF000000000000000000000000