123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561 |
- // Copyright 2017 The go-ethereum Authors
- // This file is part of the go-ethereum library.
- //
- // The go-ethereum library is free software: you can redistribute it and/or modify
- // it under the terms of the GNU Lesser General Public License as published by
- // the Free Software Foundation, either version 3 of the License, or
- // (at your option) any later version.
- //
- // The go-ethereum library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
- // Package bmt provides a binary merkle tree implementation
- package bmt
- import (
- "fmt"
- "hash"
- "io"
- "strings"
- "sync"
- "sync/atomic"
- )
- /*
- Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size
- It is defined as the root hash of the binary merkle tree built over fixed size segments
- of the underlying chunk using any base hash function (e.g keccak 256 SHA3)
- It is used as the chunk hash function in swarm which in turn is the basis for the
- 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
- The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
- segment is a substring of a chunk starting at a particular offset
- The size of the underlying segments is fixed at 32 bytes (called the resolution
- of the BMT hash), the EVM word size to optimize for on-chain BMT verification
- as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
- Two implementations are provided:
- * RefHasher is optimized for code simplicity and meant as a reference implementation
- * Hasher is optimized for speed taking advantage of concurrency with minimalistic
- control structure to coordinate the concurrent routines
- It implements the ChunkHash interface as well as the go standard hash.Hash interface
- */
- const (
- // DefaultSegmentCount is the maximum number of segments of the underlying chunk
- DefaultSegmentCount = 128 // Should be equal to storage.DefaultBranches
- // DefaultPoolSize is the maximum number of bmt trees used by the hashers, i.e,
- // the maximum number of concurrent BMT hashing operations performed by the same hasher
- DefaultPoolSize = 8
- )
- // BaseHasher is a hash.Hash constructor function used for the base hash of the BMT.
- type BaseHasher func() hash.Hash
- // Hasher a reusable hasher for fixed maximum size chunks representing a BMT
- // implements the hash.Hash interface
- // reuse pool of Tree-s for amortised memory allocation and resource control
- // supports order-agnostic concurrent segment writes
- // as well as sequential read and write
- // can not be called concurrently on more than one chunk
- // can be further appended after Sum
- // Reset gives back the Tree to the pool and guaranteed to leave
- // the tree and itself in a state reusable for hashing a new chunk
- type Hasher struct {
- pool *TreePool // BMT resource pool
- bmt *Tree // prebuilt BMT resource for flowcontrol and proofs
- blocksize int // segment size (size of hash) also for hash.Hash
- count int // segment count
- size int // for hash.Hash same as hashsize
- cur int // cursor position for rightmost currently open chunk
- segment []byte // the rightmost open segment (not complete)
- depth int // index of last level
- result chan []byte // result channel
- hash []byte // to record the result
- max int32 // max segments for SegmentWriter interface
- blockLength []byte // The block length that needes to be added in Sum
- }
- // New creates a reusable Hasher
- // implements the hash.Hash interface
- // pulls a new Tree from a resource pool for hashing each chunk
- func New(p *TreePool) *Hasher {
- return &Hasher{
- pool: p,
- depth: depth(p.SegmentCount),
- size: p.SegmentSize,
- blocksize: p.SegmentSize,
- count: p.SegmentCount,
- result: make(chan []byte),
- }
- }
- // Node is a reuseable segment hasher representing a node in a BMT
- // it allows for continued writes after a Sum
- // and is left in completely reusable state after Reset
- type Node struct {
- level, index int // position of node for information/logging only
- initial bool // first and last node
- root bool // whether the node is root to a smaller BMT
- isLeft bool // whether it is left side of the parent double segment
- unbalanced bool // indicates if a node has only the left segment
- parent *Node // BMT connections
- state int32 // atomic increment impl concurrent boolean toggle
- left, right []byte
- }
- // NewNode constructor for segment hasher nodes in the BMT
- func NewNode(level, index int, parent *Node) *Node {
- return &Node{
- parent: parent,
- level: level,
- index: index,
- initial: index == 0,
- isLeft: index%2 == 0,
- }
- }
- // TreePool provides a pool of Trees used as resources by Hasher
- // a Tree popped from the pool is guaranteed to have clean state
- // for hashing a new chunk
- // Hasher Reset releases the Tree to the pool
- type TreePool struct {
- lock sync.Mutex
- c chan *Tree
- hasher BaseHasher
- SegmentSize int
- SegmentCount int
- Capacity int
- count int
- }
- // NewTreePool creates a Tree pool with hasher, segment size, segment count and capacity
- // on GetTree it reuses free Trees or creates a new one if size is not reached
- func NewTreePool(hasher BaseHasher, segmentCount, capacity int) *TreePool {
- return &TreePool{
- c: make(chan *Tree, capacity),
- hasher: hasher,
- SegmentSize: hasher().Size(),
- SegmentCount: segmentCount,
- Capacity: capacity,
- }
- }
- // Drain drains the pool until it has no more than n resources
- func (p *TreePool) Drain(n int) {
- p.lock.Lock()
- defer p.lock.Unlock()
- for len(p.c) > n {
- <-p.c
- p.count--
- }
- }
- // Reserve is blocking until it returns an available Tree
- // it reuses free Trees or creates a new one if size is not reached
- func (p *TreePool) Reserve() *Tree {
- p.lock.Lock()
- defer p.lock.Unlock()
- var t *Tree
- if p.count == p.Capacity {
- return <-p.c
- }
- select {
- case t = <-p.c:
- default:
- t = NewTree(p.hasher, p.SegmentSize, p.SegmentCount)
- p.count++
- }
- return t
- }
- // Release gives back a Tree to the pool.
- // This Tree is guaranteed to be in reusable state
- // does not need locking
- func (p *TreePool) Release(t *Tree) {
- p.c <- t // can never fail but...
- }
- // Tree is a reusable control structure representing a BMT
- // organised in a binary tree
- // Hasher uses a TreePool to pick one for each chunk hash
- // the Tree is 'locked' while not in the pool
- type Tree struct {
- leaves []*Node
- }
- // Draw draws the BMT (badly)
- func (t *Tree) Draw(hash []byte, d int) string {
- var left, right []string
- var anc []*Node
- for i, n := range t.leaves {
- left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
- if i%2 == 0 {
- anc = append(anc, n.parent)
- }
- right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
- }
- anc = t.leaves
- var hashes [][]string
- for l := 0; len(anc) > 0; l++ {
- var nodes []*Node
- hash := []string{""}
- for i, n := range anc {
- hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
- if i%2 == 0 && n.parent != nil {
- nodes = append(nodes, n.parent)
- }
- }
- hash = append(hash, "")
- hashes = append(hashes, hash)
- anc = nodes
- }
- hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
- total := 60
- del := " "
- var rows []string
- for i := len(hashes) - 1; i >= 0; i-- {
- var textlen int
- hash := hashes[i]
- for _, s := range hash {
- textlen += len(s)
- }
- if total < textlen {
- total = textlen + len(hash)
- }
- delsize := (total - textlen) / (len(hash) - 1)
- if delsize > len(del) {
- delsize = len(del)
- }
- row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
- rows = append(rows, row)
- }
- rows = append(rows, strings.Join(left, " "))
- rows = append(rows, strings.Join(right, " "))
- return strings.Join(rows, "\n") + "\n"
- }
- // NewTree initialises the Tree by building up the nodes of a BMT
- // segment size is stipulated to be the size of the hash
- // segmentCount needs to be positive integer and does not need to be
- // a power of two and can even be an odd number
- // segmentSize * segmentCount determines the maximum chunk size
- // hashed using the tree
- func NewTree(hasher BaseHasher, segmentSize, segmentCount int) *Tree {
- n := NewNode(0, 0, nil)
- n.root = true
- prevlevel := []*Node{n}
- // iterate over levels and creates 2^level nodes
- level := 1
- count := 2
- for d := 1; d <= depth(segmentCount); d++ {
- nodes := make([]*Node, count)
- for i := 0; i < len(nodes); i++ {
- parent := prevlevel[i/2]
- t := NewNode(level, i, parent)
- nodes[i] = t
- }
- prevlevel = nodes
- level++
- count *= 2
- }
- // the datanode level is the nodes on the last level where
- return &Tree{
- leaves: prevlevel,
- }
- }
- // methods needed by hash.Hash
- // Size returns the size
- func (h *Hasher) Size() int {
- return h.size
- }
- // BlockSize returns the block size
- func (h *Hasher) BlockSize() int {
- return h.blocksize
- }
- // Sum returns the hash of the buffer
- // hash.Hash interface Sum method appends the byte slice to the underlying
- // data before it calculates and returns the hash of the chunk
- func (h *Hasher) Sum(b []byte) (r []byte) {
- t := h.bmt
- i := h.cur
- n := t.leaves[i]
- j := i
- // must run strictly before all nodes calculate
- // datanodes are guaranteed to have a parent
- if len(h.segment) > h.size && i > 0 && n.parent != nil {
- n = n.parent
- } else {
- i *= 2
- }
- d := h.finalise(n, i)
- h.writeSegment(j, h.segment, d)
- c := <-h.result
- h.releaseTree()
- // sha3(length + BMT(pure_chunk))
- if h.blockLength == nil {
- return c
- }
- res := h.pool.hasher()
- res.Reset()
- res.Write(h.blockLength)
- res.Write(c)
- return res.Sum(nil)
- }
- // Hasher implements the SwarmHash interface
- // Hash waits for the hasher result and returns it
- // caller must call this on a BMT Hasher being written to
- func (h *Hasher) Hash() []byte {
- return <-h.result
- }
- // Hasher implements the io.Writer interface
- // Write fills the buffer to hash
- // with every full segment complete launches a hasher go routine
- // that shoots up the BMT
- func (h *Hasher) Write(b []byte) (int, error) {
- l := len(b)
- if l <= 0 {
- return 0, nil
- }
- s := h.segment
- i := h.cur
- count := (h.count + 1) / 2
- need := h.count*h.size - h.cur*2*h.size
- size := h.size
- if need > size {
- size *= 2
- }
- if l < need {
- need = l
- }
- // calculate missing bit to complete current open segment
- rest := size - len(s)
- if need < rest {
- rest = need
- }
- s = append(s, b[:rest]...)
- need -= rest
- // read full segments and the last possibly partial segment
- for need > 0 && i < count-1 {
- // push all finished chunks we read
- h.writeSegment(i, s, h.depth)
- need -= size
- if need < 0 {
- size += need
- }
- s = b[rest : rest+size]
- rest += size
- i++
- }
- h.segment = s
- h.cur = i
- // otherwise, we can assume len(s) == 0, so all buffer is read and chunk is not yet full
- return l, nil
- }
- // Hasher implements the io.ReaderFrom interface
- // ReadFrom reads from io.Reader and appends to the data to hash using Write
- // it reads so that chunk to hash is maximum length or reader reaches EOF
- // caller must Reset the hasher prior to call
- func (h *Hasher) ReadFrom(r io.Reader) (m int64, err error) {
- bufsize := h.size*h.count - h.size*h.cur - len(h.segment)
- buf := make([]byte, bufsize)
- var read int
- for {
- var n int
- n, err = r.Read(buf)
- read += n
- if err == io.EOF || read == len(buf) {
- hash := h.Sum(buf[:n])
- if read == len(buf) {
- err = NewEOC(hash)
- }
- break
- }
- if err != nil {
- break
- }
- n, err = h.Write(buf[:n])
- if err != nil {
- break
- }
- }
- return int64(read), err
- }
- // Reset needs to be called before writing to the hasher
- func (h *Hasher) Reset() {
- h.getTree()
- h.blockLength = nil
- }
- // Hasher implements the SwarmHash interface
- // ResetWithLength needs to be called before writing to the hasher
- // the argument is supposed to be the byte slice binary representation of
- // the length of the data subsumed under the hash
- func (h *Hasher) ResetWithLength(l []byte) {
- h.Reset()
- h.blockLength = l
- }
- // Release gives back the Tree to the pool whereby it unlocks
- // it resets tree, segment and index
- func (h *Hasher) releaseTree() {
- if h.bmt != nil {
- n := h.bmt.leaves[h.cur]
- for ; n != nil; n = n.parent {
- n.unbalanced = false
- if n.parent != nil {
- n.root = false
- }
- }
- h.pool.Release(h.bmt)
- h.bmt = nil
- }
- h.cur = 0
- h.segment = nil
- }
- func (h *Hasher) writeSegment(i int, s []byte, d int) {
- hash := h.pool.hasher()
- n := h.bmt.leaves[i]
- if len(s) > h.size && n.parent != nil {
- go func() {
- hash.Reset()
- hash.Write(s)
- s = hash.Sum(nil)
- if n.root {
- h.result <- s
- return
- }
- h.run(n.parent, hash, d, n.index, s)
- }()
- return
- }
- go h.run(n, hash, d, i*2, s)
- }
- func (h *Hasher) run(n *Node, hash hash.Hash, d int, i int, s []byte) {
- isLeft := i%2 == 0
- for {
- if isLeft {
- n.left = s
- } else {
- n.right = s
- }
- if !n.unbalanced && n.toggle() {
- return
- }
- if !n.unbalanced || !isLeft || i == 0 && d == 0 {
- hash.Reset()
- hash.Write(n.left)
- hash.Write(n.right)
- s = hash.Sum(nil)
- } else {
- s = append(n.left, n.right...)
- }
- h.hash = s
- if n.root {
- h.result <- s
- return
- }
- isLeft = n.isLeft
- n = n.parent
- i++
- }
- }
- // getTree obtains a BMT resource by reserving one from the pool
- func (h *Hasher) getTree() *Tree {
- if h.bmt != nil {
- return h.bmt
- }
- t := h.pool.Reserve()
- h.bmt = t
- return t
- }
- // atomic bool toggle implementing a concurrent reusable 2-state object
- // atomic addint with %2 implements atomic bool toggle
- // it returns true if the toggler just put it in the active/waiting state
- func (n *Node) toggle() bool {
- return atomic.AddInt32(&n.state, 1)%2 == 1
- }
- func hashstr(b []byte) string {
- end := len(b)
- if end > 4 {
- end = 4
- }
- return fmt.Sprintf("%x", b[:end])
- }
- func depth(n int) (d int) {
- for l := (n - 1) / 2; l > 0; l /= 2 {
- d++
- }
- return d
- }
- // finalise is following the zigzags on the tree belonging
- // to the final datasegment
- func (h *Hasher) finalise(n *Node, i int) (d int) {
- isLeft := i%2 == 0
- for {
- // when the final segment's path is going via left segments
- // the incoming data is pushed to the parent upon pulling the left
- // we do not need toggle the state since this condition is
- // detectable
- n.unbalanced = isLeft
- n.right = nil
- if n.initial {
- n.root = true
- return d
- }
- isLeft = n.isLeft
- n = n.parent
- d++
- }
- }
- // EOC (end of chunk) implements the error interface
- type EOC struct {
- Hash []byte // read the hash of the chunk off the error
- }
- // Error returns the error string
- func (e *EOC) Error() string {
- return fmt.Sprintf("hasher limit reached, chunk hash: %x", e.Hash)
- }
- // NewEOC creates new end of chunk error with the hash
- func NewEOC(hash []byte) *EOC {
- return &EOC{hash}
- }
|