|
- // Copyright 2014 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- //
- // ARM version of md5block.go
- #include "textflag.h"
- // SHA1 block routine. See sha1block.go for Go equivalent.
- //
- // There are 80 rounds of 4 types:
- // - rounds 0-15 are type 1 and load data (ROUND1 macro).
- // - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
- // - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
- // - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
- // - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
- //
- // Each round loads or shuffles the data, then computes a per-round
- // function of b, c, d, and then mixes the result into and rotates the
- // five registers a, b, c, d, e holding the intermediate results.
- //
- // The register rotation is implemented by rotating the arguments to
- // the round macros instead of by explicit move instructions.
- // Register definitions
- #define Rdata R0 // Pointer to incoming data
- #define Rconst R1 // Current constant for SHA round
- #define Ra R2 // SHA1 accumulator
- #define Rb R3 // SHA1 accumulator
- #define Rc R4 // SHA1 accumulator
- #define Rd R5 // SHA1 accumulator
- #define Re R6 // SHA1 accumulator
- #define Rt0 R7 // Temporary
- #define Rt1 R8 // Temporary
- // r9, r10 are forbidden
- // r11 is OK provided you check the assembler that no synthetic instructions use it
- #define Rt2 R11 // Temporary
- #define Rctr R12 // loop counter
- #define Rw R14 // point to w buffer
- // func block(dig *digest, p []byte)
- // 0(FP) is *digest
- // 4(FP) is p.array (struct Slice)
- // 8(FP) is p.len
- //12(FP) is p.cap
- //
- // Stack frame
- #define p_end end-4(SP) // pointer to the end of data
- #define p_data data-8(SP) // current data pointer (unused?)
- #define w_buf buf-(8+4*80)(SP) //80 words temporary buffer w uint32[80]
- #define saved abcde-(8+4*80+4*5)(SP) // saved sha1 registers a,b,c,d,e - these must be last (unused?)
- // Total size +4 for saved LR is 352
- // w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
- // e += w[i]
- #define LOAD(Re) \
- MOVBU 2(Rdata), Rt0 ; \
- MOVBU 3(Rdata), Rt1 ; \
- MOVBU 1(Rdata), Rt2 ; \
- ORR Rt0<<8, Rt1, Rt0 ; \
- MOVBU.P 4(Rdata), Rt1 ; \
- ORR Rt2<<16, Rt0, Rt0 ; \
- ORR Rt1<<24, Rt0, Rt0 ; \
- MOVW.P Rt0, 4(Rw) ; \
- ADD Rt0, Re, Re
-
- // tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
- // w[i&0xf] = tmp<<1 | tmp>>(32-1)
- // e += w[i&0xf]
- #define SHUFFLE(Re) \
- MOVW (-16*4)(Rw), Rt0 ; \
- MOVW (-14*4)(Rw), Rt1 ; \
- MOVW (-8*4)(Rw), Rt2 ; \
- EOR Rt0, Rt1, Rt0 ; \
- MOVW (-3*4)(Rw), Rt1 ; \
- EOR Rt2, Rt0, Rt0 ; \
- EOR Rt0, Rt1, Rt0 ; \
- MOVW Rt0@>(32-1), Rt0 ; \
- MOVW.P Rt0, 4(Rw) ; \
- ADD Rt0, Re, Re
- // t1 = (b & c) | ((~b) & d)
- #define FUNC1(Ra, Rb, Rc, Rd, Re) \
- MVN Rb, Rt1 ; \
- AND Rb, Rc, Rt0 ; \
- AND Rd, Rt1, Rt1 ; \
- ORR Rt0, Rt1, Rt1
- // t1 = b ^ c ^ d
- #define FUNC2(Ra, Rb, Rc, Rd, Re) \
- EOR Rb, Rc, Rt1 ; \
- EOR Rd, Rt1, Rt1
- // t1 = (b & c) | (b & d) | (c & d) =
- // t1 = (b & c) | ((b | c) & d)
- #define FUNC3(Ra, Rb, Rc, Rd, Re) \
- ORR Rb, Rc, Rt0 ; \
- AND Rb, Rc, Rt1 ; \
- AND Rd, Rt0, Rt0 ; \
- ORR Rt0, Rt1, Rt1
- #define FUNC4 FUNC2
- // a5 := a<<5 | a>>(32-5)
- // b = b<<30 | b>>(32-30)
- // e = a5 + t1 + e + const
- #define MIX(Ra, Rb, Rc, Rd, Re) \
- ADD Rt1, Re, Re ; \
- MOVW Rb@>(32-30), Rb ; \
- ADD Ra@>(32-5), Re, Re ; \
- ADD Rconst, Re, Re
- #define ROUND1(Ra, Rb, Rc, Rd, Re) \
- LOAD(Re) ; \
- FUNC1(Ra, Rb, Rc, Rd, Re) ; \
- MIX(Ra, Rb, Rc, Rd, Re)
- #define ROUND1x(Ra, Rb, Rc, Rd, Re) \
- SHUFFLE(Re) ; \
- FUNC1(Ra, Rb, Rc, Rd, Re) ; \
- MIX(Ra, Rb, Rc, Rd, Re)
- #define ROUND2(Ra, Rb, Rc, Rd, Re) \
- SHUFFLE(Re) ; \
- FUNC2(Ra, Rb, Rc, Rd, Re) ; \
- MIX(Ra, Rb, Rc, Rd, Re)
- #define ROUND3(Ra, Rb, Rc, Rd, Re) \
- SHUFFLE(Re) ; \
- FUNC3(Ra, Rb, Rc, Rd, Re) ; \
- MIX(Ra, Rb, Rc, Rd, Re)
- #define ROUND4(Ra, Rb, Rc, Rd, Re) \
- SHUFFLE(Re) ; \
- FUNC4(Ra, Rb, Rc, Rd, Re) ; \
- MIX(Ra, Rb, Rc, Rd, Re)
- // func block(dig *digest, p []byte)
- TEXT ·block(SB), 0, $352-16
- MOVW p+4(FP), Rdata // pointer to the data
- MOVW p_len+8(FP), Rt0 // number of bytes
- ADD Rdata, Rt0
- MOVW Rt0, p_end // pointer to end of data
- // Load up initial SHA1 accumulator
- MOVW dig+0(FP), Rt0
- MOVM.IA (Rt0), [Ra,Rb,Rc,Rd,Re]
- loop:
- // Save registers at SP+4 onwards
- MOVM.IB [Ra,Rb,Rc,Rd,Re], (R13)
- MOVW $w_buf, Rw
- MOVW $0x5A827999, Rconst
- MOVW $3, Rctr
- loop1: ROUND1(Ra, Rb, Rc, Rd, Re)
- ROUND1(Re, Ra, Rb, Rc, Rd)
- ROUND1(Rd, Re, Ra, Rb, Rc)
- ROUND1(Rc, Rd, Re, Ra, Rb)
- ROUND1(Rb, Rc, Rd, Re, Ra)
- SUB.S $1, Rctr
- BNE loop1
- ROUND1(Ra, Rb, Rc, Rd, Re)
- ROUND1x(Re, Ra, Rb, Rc, Rd)
- ROUND1x(Rd, Re, Ra, Rb, Rc)
- ROUND1x(Rc, Rd, Re, Ra, Rb)
- ROUND1x(Rb, Rc, Rd, Re, Ra)
-
- MOVW $0x6ED9EBA1, Rconst
- MOVW $4, Rctr
- loop2: ROUND2(Ra, Rb, Rc, Rd, Re)
- ROUND2(Re, Ra, Rb, Rc, Rd)
- ROUND2(Rd, Re, Ra, Rb, Rc)
- ROUND2(Rc, Rd, Re, Ra, Rb)
- ROUND2(Rb, Rc, Rd, Re, Ra)
- SUB.S $1, Rctr
- BNE loop2
-
- MOVW $0x8F1BBCDC, Rconst
- MOVW $4, Rctr
- loop3: ROUND3(Ra, Rb, Rc, Rd, Re)
- ROUND3(Re, Ra, Rb, Rc, Rd)
- ROUND3(Rd, Re, Ra, Rb, Rc)
- ROUND3(Rc, Rd, Re, Ra, Rb)
- ROUND3(Rb, Rc, Rd, Re, Ra)
- SUB.S $1, Rctr
- BNE loop3
-
- MOVW $0xCA62C1D6, Rconst
- MOVW $4, Rctr
- loop4: ROUND4(Ra, Rb, Rc, Rd, Re)
- ROUND4(Re, Ra, Rb, Rc, Rd)
- ROUND4(Rd, Re, Ra, Rb, Rc)
- ROUND4(Rc, Rd, Re, Ra, Rb)
- ROUND4(Rb, Rc, Rd, Re, Ra)
- SUB.S $1, Rctr
- BNE loop4
- // Accumulate - restoring registers from SP+4
- MOVM.IB (R13), [Rt0,Rt1,Rt2,Rctr,Rw]
- ADD Rt0, Ra
- ADD Rt1, Rb
- ADD Rt2, Rc
- ADD Rctr, Rd
- ADD Rw, Re
- MOVW p_end, Rt0
- CMP Rt0, Rdata
- BLO loop
- // Save final SHA1 accumulator
- MOVW dig+0(FP), Rt0
- MOVM.IA [Ra,Rb,Rc,Rd,Re], (Rt0)
- RET
|