sha1-armv4-large.S 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. #define __ARM_ARCH__ __LINUX_ARM_ARCH__
  2. @ ====================================================================
  3. @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  4. @ project. The module is, however, dual licensed under OpenSSL and
  5. @ CRYPTOGAMS licenses depending on where you obtain it. For further
  6. @ details see http://www.openssl.org/~appro/cryptogams/.
  7. @ ====================================================================
  8. @ sha1_block procedure for ARMv4.
  9. @
  10. @ January 2007.
  11. @ Size/performance trade-off
  12. @ ====================================================================
  13. @ impl size in bytes comp cycles[*] measured performance
  14. @ ====================================================================
  15. @ thumb 304 3212 4420
  16. @ armv4-small 392/+29% 1958/+64% 2250/+96%
  17. @ armv4-compact 740/+89% 1552/+26% 1840/+22%
  18. @ armv4-large 1420/+92% 1307/+19% 1370/+34%[***]
  19. @ full unroll ~5100/+260% ~1260/+4% ~1300/+5%
  20. @ ====================================================================
  21. @ thumb = same as 'small' but in Thumb instructions[**] and
  22. @ with recurring code in two private functions;
  23. @ small = detached Xload/update, loops are folded;
  24. @ compact = detached Xload/update, 5x unroll;
  25. @ large = interleaved Xload/update, 5x unroll;
  26. @ full unroll = interleaved Xload/update, full unroll, estimated[!];
  27. @
  28. @ [*] Manually counted instructions in "grand" loop body. Measured
  29. @ performance is affected by prologue and epilogue overhead,
  30. @ i-cache availability, branch penalties, etc.
  31. @ [**] While each Thumb instruction is twice smaller, they are not as
  32. @ diverse as ARM ones: e.g., there are only two arithmetic
  33. @ instructions with 3 arguments, no [fixed] rotate, addressing
  34. @ modes are limited. As result it takes more instructions to do
  35. @ the same job in Thumb, therefore the code is never twice as
  36. @ small and always slower.
  37. @ [***] which is also ~35% better than compiler generated code. Dual-
  38. @ issue Cortex A8 core was measured to process input block in
  39. @ ~990 cycles.
  40. @ August 2010.
  41. @
  42. @ Rescheduling for dual-issue pipeline resulted in 13% improvement on
  43. @ Cortex A8 core and in absolute terms ~870 cycles per input block
  44. @ [or 13.6 cycles per byte].
  45. @ February 2011.
  46. @
  47. @ Profiler-assisted and platform-specific optimization resulted in 10%
  48. @ improvement on Cortex A8 core and 12.2 cycles per byte.
  49. #include <linux/linkage.h>
  50. .text
  51. .align 2
  52. ENTRY(sha1_block_data_order)
  53. stmdb sp!,{r4-r12,lr}
  54. add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
  55. ldmia r0,{r3,r4,r5,r6,r7}
  56. .Lloop:
  57. ldr r8,.LK_00_19
  58. mov r14,sp
  59. sub sp,sp,#15*4
  60. mov r5,r5,ror#30
  61. mov r6,r6,ror#30
  62. mov r7,r7,ror#30 @ [6]
  63. .L_00_15:
  64. #if __ARM_ARCH__<7
  65. ldrb r10,[r1,#2]
  66. ldrb r9,[r1,#3]
  67. ldrb r11,[r1,#1]
  68. add r7,r8,r7,ror#2 @ E+=K_00_19
  69. ldrb r12,[r1],#4
  70. orr r9,r9,r10,lsl#8
  71. eor r10,r5,r6 @ F_xx_xx
  72. orr r9,r9,r11,lsl#16
  73. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  74. orr r9,r9,r12,lsl#24
  75. #else
  76. ldr r9,[r1],#4 @ handles unaligned
  77. add r7,r8,r7,ror#2 @ E+=K_00_19
  78. eor r10,r5,r6 @ F_xx_xx
  79. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  80. #ifdef __ARMEL__
  81. rev r9,r9 @ byte swap
  82. #endif
  83. #endif
  84. and r10,r4,r10,ror#2
  85. add r7,r7,r9 @ E+=X[i]
  86. eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
  87. str r9,[r14,#-4]!
  88. add r7,r7,r10 @ E+=F_00_19(B,C,D)
  89. #if __ARM_ARCH__<7
  90. ldrb r10,[r1,#2]
  91. ldrb r9,[r1,#3]
  92. ldrb r11,[r1,#1]
  93. add r6,r8,r6,ror#2 @ E+=K_00_19
  94. ldrb r12,[r1],#4
  95. orr r9,r9,r10,lsl#8
  96. eor r10,r4,r5 @ F_xx_xx
  97. orr r9,r9,r11,lsl#16
  98. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  99. orr r9,r9,r12,lsl#24
  100. #else
  101. ldr r9,[r1],#4 @ handles unaligned
  102. add r6,r8,r6,ror#2 @ E+=K_00_19
  103. eor r10,r4,r5 @ F_xx_xx
  104. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  105. #ifdef __ARMEL__
  106. rev r9,r9 @ byte swap
  107. #endif
  108. #endif
  109. and r10,r3,r10,ror#2
  110. add r6,r6,r9 @ E+=X[i]
  111. eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
  112. str r9,[r14,#-4]!
  113. add r6,r6,r10 @ E+=F_00_19(B,C,D)
  114. #if __ARM_ARCH__<7
  115. ldrb r10,[r1,#2]
  116. ldrb r9,[r1,#3]
  117. ldrb r11,[r1,#1]
  118. add r5,r8,r5,ror#2 @ E+=K_00_19
  119. ldrb r12,[r1],#4
  120. orr r9,r9,r10,lsl#8
  121. eor r10,r3,r4 @ F_xx_xx
  122. orr r9,r9,r11,lsl#16
  123. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  124. orr r9,r9,r12,lsl#24
  125. #else
  126. ldr r9,[r1],#4 @ handles unaligned
  127. add r5,r8,r5,ror#2 @ E+=K_00_19
  128. eor r10,r3,r4 @ F_xx_xx
  129. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  130. #ifdef __ARMEL__
  131. rev r9,r9 @ byte swap
  132. #endif
  133. #endif
  134. and r10,r7,r10,ror#2
  135. add r5,r5,r9 @ E+=X[i]
  136. eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
  137. str r9,[r14,#-4]!
  138. add r5,r5,r10 @ E+=F_00_19(B,C,D)
  139. #if __ARM_ARCH__<7
  140. ldrb r10,[r1,#2]
  141. ldrb r9,[r1,#3]
  142. ldrb r11,[r1,#1]
  143. add r4,r8,r4,ror#2 @ E+=K_00_19
  144. ldrb r12,[r1],#4
  145. orr r9,r9,r10,lsl#8
  146. eor r10,r7,r3 @ F_xx_xx
  147. orr r9,r9,r11,lsl#16
  148. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  149. orr r9,r9,r12,lsl#24
  150. #else
  151. ldr r9,[r1],#4 @ handles unaligned
  152. add r4,r8,r4,ror#2 @ E+=K_00_19
  153. eor r10,r7,r3 @ F_xx_xx
  154. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  155. #ifdef __ARMEL__
  156. rev r9,r9 @ byte swap
  157. #endif
  158. #endif
  159. and r10,r6,r10,ror#2
  160. add r4,r4,r9 @ E+=X[i]
  161. eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
  162. str r9,[r14,#-4]!
  163. add r4,r4,r10 @ E+=F_00_19(B,C,D)
  164. #if __ARM_ARCH__<7
  165. ldrb r10,[r1,#2]
  166. ldrb r9,[r1,#3]
  167. ldrb r11,[r1,#1]
  168. add r3,r8,r3,ror#2 @ E+=K_00_19
  169. ldrb r12,[r1],#4
  170. orr r9,r9,r10,lsl#8
  171. eor r10,r6,r7 @ F_xx_xx
  172. orr r9,r9,r11,lsl#16
  173. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  174. orr r9,r9,r12,lsl#24
  175. #else
  176. ldr r9,[r1],#4 @ handles unaligned
  177. add r3,r8,r3,ror#2 @ E+=K_00_19
  178. eor r10,r6,r7 @ F_xx_xx
  179. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  180. #ifdef __ARMEL__
  181. rev r9,r9 @ byte swap
  182. #endif
  183. #endif
  184. and r10,r5,r10,ror#2
  185. add r3,r3,r9 @ E+=X[i]
  186. eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
  187. str r9,[r14,#-4]!
  188. add r3,r3,r10 @ E+=F_00_19(B,C,D)
  189. cmp r14,sp
  190. bne .L_00_15 @ [((11+4)*5+2)*3]
  191. sub sp,sp,#25*4
  192. #if __ARM_ARCH__<7
  193. ldrb r10,[r1,#2]
  194. ldrb r9,[r1,#3]
  195. ldrb r11,[r1,#1]
  196. add r7,r8,r7,ror#2 @ E+=K_00_19
  197. ldrb r12,[r1],#4
  198. orr r9,r9,r10,lsl#8
  199. eor r10,r5,r6 @ F_xx_xx
  200. orr r9,r9,r11,lsl#16
  201. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  202. orr r9,r9,r12,lsl#24
  203. #else
  204. ldr r9,[r1],#4 @ handles unaligned
  205. add r7,r8,r7,ror#2 @ E+=K_00_19
  206. eor r10,r5,r6 @ F_xx_xx
  207. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  208. #ifdef __ARMEL__
  209. rev r9,r9 @ byte swap
  210. #endif
  211. #endif
  212. and r10,r4,r10,ror#2
  213. add r7,r7,r9 @ E+=X[i]
  214. eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
  215. str r9,[r14,#-4]!
  216. add r7,r7,r10 @ E+=F_00_19(B,C,D)
  217. ldr r9,[r14,#15*4]
  218. ldr r10,[r14,#13*4]
  219. ldr r11,[r14,#7*4]
  220. add r6,r8,r6,ror#2 @ E+=K_xx_xx
  221. ldr r12,[r14,#2*4]
  222. eor r9,r9,r10
  223. eor r11,r11,r12 @ 1 cycle stall
  224. eor r10,r4,r5 @ F_xx_xx
  225. mov r9,r9,ror#31
  226. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  227. eor r9,r9,r11,ror#31
  228. str r9,[r14,#-4]!
  229. and r10,r3,r10,ror#2 @ F_xx_xx
  230. @ F_xx_xx
  231. add r6,r6,r9 @ E+=X[i]
  232. eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
  233. add r6,r6,r10 @ E+=F_00_19(B,C,D)
  234. ldr r9,[r14,#15*4]
  235. ldr r10,[r14,#13*4]
  236. ldr r11,[r14,#7*4]
  237. add r5,r8,r5,ror#2 @ E+=K_xx_xx
  238. ldr r12,[r14,#2*4]
  239. eor r9,r9,r10
  240. eor r11,r11,r12 @ 1 cycle stall
  241. eor r10,r3,r4 @ F_xx_xx
  242. mov r9,r9,ror#31
  243. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  244. eor r9,r9,r11,ror#31
  245. str r9,[r14,#-4]!
  246. and r10,r7,r10,ror#2 @ F_xx_xx
  247. @ F_xx_xx
  248. add r5,r5,r9 @ E+=X[i]
  249. eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
  250. add r5,r5,r10 @ E+=F_00_19(B,C,D)
  251. ldr r9,[r14,#15*4]
  252. ldr r10,[r14,#13*4]
  253. ldr r11,[r14,#7*4]
  254. add r4,r8,r4,ror#2 @ E+=K_xx_xx
  255. ldr r12,[r14,#2*4]
  256. eor r9,r9,r10
  257. eor r11,r11,r12 @ 1 cycle stall
  258. eor r10,r7,r3 @ F_xx_xx
  259. mov r9,r9,ror#31
  260. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  261. eor r9,r9,r11,ror#31
  262. str r9,[r14,#-4]!
  263. and r10,r6,r10,ror#2 @ F_xx_xx
  264. @ F_xx_xx
  265. add r4,r4,r9 @ E+=X[i]
  266. eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
  267. add r4,r4,r10 @ E+=F_00_19(B,C,D)
  268. ldr r9,[r14,#15*4]
  269. ldr r10,[r14,#13*4]
  270. ldr r11,[r14,#7*4]
  271. add r3,r8,r3,ror#2 @ E+=K_xx_xx
  272. ldr r12,[r14,#2*4]
  273. eor r9,r9,r10
  274. eor r11,r11,r12 @ 1 cycle stall
  275. eor r10,r6,r7 @ F_xx_xx
  276. mov r9,r9,ror#31
  277. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  278. eor r9,r9,r11,ror#31
  279. str r9,[r14,#-4]!
  280. and r10,r5,r10,ror#2 @ F_xx_xx
  281. @ F_xx_xx
  282. add r3,r3,r9 @ E+=X[i]
  283. eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
  284. add r3,r3,r10 @ E+=F_00_19(B,C,D)
  285. ldr r8,.LK_20_39 @ [+15+16*4]
  286. cmn sp,#0 @ [+3], clear carry to denote 20_39
  287. .L_20_39_or_60_79:
  288. ldr r9,[r14,#15*4]
  289. ldr r10,[r14,#13*4]
  290. ldr r11,[r14,#7*4]
  291. add r7,r8,r7,ror#2 @ E+=K_xx_xx
  292. ldr r12,[r14,#2*4]
  293. eor r9,r9,r10
  294. eor r11,r11,r12 @ 1 cycle stall
  295. eor r10,r5,r6 @ F_xx_xx
  296. mov r9,r9,ror#31
  297. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  298. eor r9,r9,r11,ror#31
  299. str r9,[r14,#-4]!
  300. eor r10,r4,r10,ror#2 @ F_xx_xx
  301. @ F_xx_xx
  302. add r7,r7,r9 @ E+=X[i]
  303. add r7,r7,r10 @ E+=F_20_39(B,C,D)
  304. ldr r9,[r14,#15*4]
  305. ldr r10,[r14,#13*4]
  306. ldr r11,[r14,#7*4]
  307. add r6,r8,r6,ror#2 @ E+=K_xx_xx
  308. ldr r12,[r14,#2*4]
  309. eor r9,r9,r10
  310. eor r11,r11,r12 @ 1 cycle stall
  311. eor r10,r4,r5 @ F_xx_xx
  312. mov r9,r9,ror#31
  313. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  314. eor r9,r9,r11,ror#31
  315. str r9,[r14,#-4]!
  316. eor r10,r3,r10,ror#2 @ F_xx_xx
  317. @ F_xx_xx
  318. add r6,r6,r9 @ E+=X[i]
  319. add r6,r6,r10 @ E+=F_20_39(B,C,D)
  320. ldr r9,[r14,#15*4]
  321. ldr r10,[r14,#13*4]
  322. ldr r11,[r14,#7*4]
  323. add r5,r8,r5,ror#2 @ E+=K_xx_xx
  324. ldr r12,[r14,#2*4]
  325. eor r9,r9,r10
  326. eor r11,r11,r12 @ 1 cycle stall
  327. eor r10,r3,r4 @ F_xx_xx
  328. mov r9,r9,ror#31
  329. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  330. eor r9,r9,r11,ror#31
  331. str r9,[r14,#-4]!
  332. eor r10,r7,r10,ror#2 @ F_xx_xx
  333. @ F_xx_xx
  334. add r5,r5,r9 @ E+=X[i]
  335. add r5,r5,r10 @ E+=F_20_39(B,C,D)
  336. ldr r9,[r14,#15*4]
  337. ldr r10,[r14,#13*4]
  338. ldr r11,[r14,#7*4]
  339. add r4,r8,r4,ror#2 @ E+=K_xx_xx
  340. ldr r12,[r14,#2*4]
  341. eor r9,r9,r10
  342. eor r11,r11,r12 @ 1 cycle stall
  343. eor r10,r7,r3 @ F_xx_xx
  344. mov r9,r9,ror#31
  345. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  346. eor r9,r9,r11,ror#31
  347. str r9,[r14,#-4]!
  348. eor r10,r6,r10,ror#2 @ F_xx_xx
  349. @ F_xx_xx
  350. add r4,r4,r9 @ E+=X[i]
  351. add r4,r4,r10 @ E+=F_20_39(B,C,D)
  352. ldr r9,[r14,#15*4]
  353. ldr r10,[r14,#13*4]
  354. ldr r11,[r14,#7*4]
  355. add r3,r8,r3,ror#2 @ E+=K_xx_xx
  356. ldr r12,[r14,#2*4]
  357. eor r9,r9,r10
  358. eor r11,r11,r12 @ 1 cycle stall
  359. eor r10,r6,r7 @ F_xx_xx
  360. mov r9,r9,ror#31
  361. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  362. eor r9,r9,r11,ror#31
  363. str r9,[r14,#-4]!
  364. eor r10,r5,r10,ror#2 @ F_xx_xx
  365. @ F_xx_xx
  366. add r3,r3,r9 @ E+=X[i]
  367. add r3,r3,r10 @ E+=F_20_39(B,C,D)
  368. ARM( teq r14,sp ) @ preserve carry
  369. THUMB( mov r11,sp )
  370. THUMB( teq r14,r11 ) @ preserve carry
  371. bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
  372. bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
  373. ldr r8,.LK_40_59
  374. sub sp,sp,#20*4 @ [+2]
  375. .L_40_59:
  376. ldr r9,[r14,#15*4]
  377. ldr r10,[r14,#13*4]
  378. ldr r11,[r14,#7*4]
  379. add r7,r8,r7,ror#2 @ E+=K_xx_xx
  380. ldr r12,[r14,#2*4]
  381. eor r9,r9,r10
  382. eor r11,r11,r12 @ 1 cycle stall
  383. eor r10,r5,r6 @ F_xx_xx
  384. mov r9,r9,ror#31
  385. add r7,r7,r3,ror#27 @ E+=ROR(A,27)
  386. eor r9,r9,r11,ror#31
  387. str r9,[r14,#-4]!
  388. and r10,r4,r10,ror#2 @ F_xx_xx
  389. and r11,r5,r6 @ F_xx_xx
  390. add r7,r7,r9 @ E+=X[i]
  391. add r7,r7,r10 @ E+=F_40_59(B,C,D)
  392. add r7,r7,r11,ror#2
  393. ldr r9,[r14,#15*4]
  394. ldr r10,[r14,#13*4]
  395. ldr r11,[r14,#7*4]
  396. add r6,r8,r6,ror#2 @ E+=K_xx_xx
  397. ldr r12,[r14,#2*4]
  398. eor r9,r9,r10
  399. eor r11,r11,r12 @ 1 cycle stall
  400. eor r10,r4,r5 @ F_xx_xx
  401. mov r9,r9,ror#31
  402. add r6,r6,r7,ror#27 @ E+=ROR(A,27)
  403. eor r9,r9,r11,ror#31
  404. str r9,[r14,#-4]!
  405. and r10,r3,r10,ror#2 @ F_xx_xx
  406. and r11,r4,r5 @ F_xx_xx
  407. add r6,r6,r9 @ E+=X[i]
  408. add r6,r6,r10 @ E+=F_40_59(B,C,D)
  409. add r6,r6,r11,ror#2
  410. ldr r9,[r14,#15*4]
  411. ldr r10,[r14,#13*4]
  412. ldr r11,[r14,#7*4]
  413. add r5,r8,r5,ror#2 @ E+=K_xx_xx
  414. ldr r12,[r14,#2*4]
  415. eor r9,r9,r10
  416. eor r11,r11,r12 @ 1 cycle stall
  417. eor r10,r3,r4 @ F_xx_xx
  418. mov r9,r9,ror#31
  419. add r5,r5,r6,ror#27 @ E+=ROR(A,27)
  420. eor r9,r9,r11,ror#31
  421. str r9,[r14,#-4]!
  422. and r10,r7,r10,ror#2 @ F_xx_xx
  423. and r11,r3,r4 @ F_xx_xx
  424. add r5,r5,r9 @ E+=X[i]
  425. add r5,r5,r10 @ E+=F_40_59(B,C,D)
  426. add r5,r5,r11,ror#2
  427. ldr r9,[r14,#15*4]
  428. ldr r10,[r14,#13*4]
  429. ldr r11,[r14,#7*4]
  430. add r4,r8,r4,ror#2 @ E+=K_xx_xx
  431. ldr r12,[r14,#2*4]
  432. eor r9,r9,r10
  433. eor r11,r11,r12 @ 1 cycle stall
  434. eor r10,r7,r3 @ F_xx_xx
  435. mov r9,r9,ror#31
  436. add r4,r4,r5,ror#27 @ E+=ROR(A,27)
  437. eor r9,r9,r11,ror#31
  438. str r9,[r14,#-4]!
  439. and r10,r6,r10,ror#2 @ F_xx_xx
  440. and r11,r7,r3 @ F_xx_xx
  441. add r4,r4,r9 @ E+=X[i]
  442. add r4,r4,r10 @ E+=F_40_59(B,C,D)
  443. add r4,r4,r11,ror#2
  444. ldr r9,[r14,#15*4]
  445. ldr r10,[r14,#13*4]
  446. ldr r11,[r14,#7*4]
  447. add r3,r8,r3,ror#2 @ E+=K_xx_xx
  448. ldr r12,[r14,#2*4]
  449. eor r9,r9,r10
  450. eor r11,r11,r12 @ 1 cycle stall
  451. eor r10,r6,r7 @ F_xx_xx
  452. mov r9,r9,ror#31
  453. add r3,r3,r4,ror#27 @ E+=ROR(A,27)
  454. eor r9,r9,r11,ror#31
  455. str r9,[r14,#-4]!
  456. and r10,r5,r10,ror#2 @ F_xx_xx
  457. and r11,r6,r7 @ F_xx_xx
  458. add r3,r3,r9 @ E+=X[i]
  459. add r3,r3,r10 @ E+=F_40_59(B,C,D)
  460. add r3,r3,r11,ror#2
  461. cmp r14,sp
  462. bne .L_40_59 @ [+((12+5)*5+2)*4]
  463. ldr r8,.LK_60_79
  464. sub sp,sp,#20*4
  465. cmp sp,#0 @ set carry to denote 60_79
  466. b .L_20_39_or_60_79 @ [+4], spare 300 bytes
  467. .L_done:
  468. add sp,sp,#80*4 @ "deallocate" stack frame
  469. ldmia r0,{r8,r9,r10,r11,r12}
  470. add r3,r8,r3
  471. add r4,r9,r4
  472. add r5,r10,r5,ror#2
  473. add r6,r11,r6,ror#2
  474. add r7,r12,r7,ror#2
  475. stmia r0,{r3,r4,r5,r6,r7}
  476. teq r1,r2
  477. bne .Lloop @ [+18], total 1307
  478. ldmia sp!,{r4-r12,pc}
  479. .align 2
  480. .LK_00_19: .word 0x5a827999
  481. .LK_20_39: .word 0x6ed9eba1
  482. .LK_40_59: .word 0x8f1bbcdc
  483. .LK_60_79: .word 0xca62c1d6
  484. ENDPROC(sha1_block_data_order)
  485. .asciz "SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
  486. .align 2