123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210 |
- // Package encapsulation implements a way of encoding variable-size chunks of
- // data and padding into a byte stream.
- //
- // Each chunk of data or padding starts with a variable-size length prefix. One
- // bit ("d") in the first byte of the prefix indicates whether the chunk
- // represents data or padding (1=data, 0=padding). Another bit ("c" for
- // "continuation") is the indicates whether there are more bytes in the length
- // prefix. The remaining 6 bits ("x") encode part of the length value.
- //
- // dcxxxxxx
- //
- // If the continuation bit is set, then the next byte is also part of the length
- // prefix. It lacks the "d" bit, has its own "c" bit, and 7 value-carrying bits
- // ("y").
- //
- // cyyyyyyy
- //
- // The length is decoded by concatenating value-carrying bits, from left to
- // right, of all value-carrying bits, up to and including the first byte whose
- // "c" bit is 0. Although in principle this encoding would allow for length
- // prefixes of any size, length prefixes are arbitrarily limited to 3 bytes and
- // any attempt to read or write a longer one is an error. These are therefore
- // the only valid formats:
- //
- // 00xxxxxx xxxxxx₂ bytes of padding
- // 10xxxxxx xxxxxx₂ bytes of data
- // 01xxxxxx 0yyyyyyy xxxxxxyyyyyyy₂ bytes of padding
- // 11xxxxxx 0yyyyyyy xxxxxxyyyyyyy₂ bytes of data
- // 01xxxxxx 1yyyyyyy 0zzzzzzz xxxxxxyyyyyyyzzzzzzz₂ bytes of padding
- // 11xxxxxx 1yyyyyyy 0zzzzzzz xxxxxxyyyyyyyzzzzzzz₂ bytes of data
- //
- // The maximum encodable length is 11111111111111111111₂ = 0xfffff = 1048575.
- // There is no requirement to use a length prefix of minimum size; i.e. 00000100
- // and 01000000 00000100 are both valid encodings of the value 4.
- //
- // After the length prefix follow that many bytes of padding or data. There are
- // no restrictions on the value of bytes comprising padding.
- //
- // The idea for this encapsulation is sketched here:
- // https://github.com/net4people/bbs/issues/9#issuecomment-524095186
- package encapsulation
- import (
- "errors"
- "io"
- )
- // ErrTooLong is the error returned when an encoded length prefix is longer than
- // 3 bytes, or when ReadData receives an input whose length is too large to
- // encode in a 3-byte length prefix.
- var ErrTooLong = errors.New("length prefix is too long")
- // ReadData the next available data chunk, skipping over any padding chunks that
- // may come first, and copies the data into p. If p is shorter than the length
- // of the data chunk, only the first len(p) bytes are copied into p, and the
- // error return is io.ErrShortBuffer. The returned error value is nil if and
- // only if a data chunk was present and was read in its entirety. The returned
- // error is io.EOF only if r ended before the first byte of a length prefix. If
- // r ended in the middle of a length prefix or data/padding, the returned error
- // is io.ErrUnexpectedEOF.
- func ReadData(r io.Reader, p []byte) (int, error) {
- for {
- var b [1]byte
- _, err := r.Read(b[:])
- if err != nil {
- // This is the only place we may return a real io.EOF.
- return 0, err
- }
- isData := (b[0] & 0x80) != 0
- moreLength := (b[0] & 0x40) != 0
- n := int(b[0] & 0x3f)
- for i := 0; moreLength; i++ {
- if i >= 2 {
- return 0, ErrTooLong
- }
- _, err := r.Read(b[:])
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- if err != nil {
- return 0, err
- }
- moreLength = (b[0] & 0x80) != 0
- n = (n << 7) | int(b[0]&0x7f)
- }
- if isData {
- if len(p) > n {
- p = p[:n]
- }
- numData, err := io.ReadFull(r, p)
- if err == nil && numData < n {
- // If the caller's buffer was too short, discard
- // the rest of the data and return
- // io.ErrShortBuffer.
- _, err = io.CopyN(io.Discard, r, int64(n-numData))
- if err == nil {
- err = io.ErrShortBuffer
- }
- }
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- return numData, err
- } else if n > 0 {
- _, err := io.CopyN(io.Discard, r, int64(n))
- if err == io.EOF {
- err = io.ErrUnexpectedEOF
- }
- if err != nil {
- return 0, err
- }
- }
- }
- }
- // dataPrefixForLength returns a length prefix for the given length, with the
- // "d" bit set to 1.
- func dataPrefixForLength(n int) ([]byte, error) {
- switch {
- case (n>>0)&0x3f == (n >> 0):
- return []byte{0x80 | byte((n>>0)&0x3f)}, nil
- case (n>>7)&0x3f == (n >> 7):
- return []byte{0xc0 | byte((n>>7)&0x3f), byte((n >> 0) & 0x7f)}, nil
- case (n>>14)&0x3f == (n >> 14):
- return []byte{0xc0 | byte((n>>14)&0x3f), 0x80 | byte((n>>7)&0x7f), byte((n >> 0) & 0x7f)}, nil
- default:
- return nil, ErrTooLong
- }
- }
- // WriteData encodes a data chunk into w. It returns the total number of bytes
- // written; i.e., including the length prefix. The error is ErrTooLong if the
- // length of data cannot fit into a length prefix.
- func WriteData(w io.Writer, data []byte) (int, error) {
- prefix, err := dataPrefixForLength(len(data))
- if err != nil {
- return 0, err
- }
- total := 0
- n, err := w.Write(prefix)
- total += n
- if err != nil {
- return total, err
- }
- n, err = w.Write(data)
- total += n
- return total, err
- }
- var paddingBuffer [1024]byte
- // WritePadding encodes padding chunks, whose total size (including their own
- // length prefixes) is n. Returns the total number of bytes written to w, which
- // will be exactly n unless there was an error. The error cannot be ErrTooLong
- // because this function will write multiple padding chunks if necessary to
- // reach the requested size. Panics if n is negative.
- func WritePadding(w io.Writer, n int) (int, error) {
- if n < 0 {
- panic("negative length")
- }
- total := 0
- for n > 0 {
- p := len(paddingBuffer)
- if p > n {
- p = n
- }
- n -= p
- var prefix []byte
- switch {
- case ((p-1)>>0)&0x3f == ((p - 1) >> 0):
- p = p - 1
- prefix = []byte{byte((p >> 0) & 0x3f)}
- case ((p-2)>>7)&0x3f == ((p - 2) >> 7):
- p = p - 2
- prefix = []byte{0x40 | byte((p>>7)&0x3f), byte((p >> 0) & 0x7f)}
- case ((p-3)>>14)&0x3f == ((p - 3) >> 14):
- p = p - 3
- prefix = []byte{0x40 | byte((p>>14)&0x3f), 0x80 | byte((p>>7)&0x3f), byte((p >> 0) & 0x7f)}
- }
- nn, err := w.Write(prefix)
- total += nn
- if err != nil {
- return total, err
- }
- nn, err = w.Write(paddingBuffer[:p])
- total += nn
- if err != nil {
- return total, err
- }
- }
- return total, nil
- }
- // MaxDataForSize returns the length of the longest slice that can pe passed to
- // WriteData, whose total encoded size (including length prefix) is no larger
- // than n. Call this to find out if a chunk of data will fit into a length
- // budget. Panics if n == 0.
- func MaxDataForSize(n int) int {
- if n == 0 {
- panic("zero length")
- }
- prefix, err := dataPrefixForLength(n)
- if err == ErrTooLong {
- return (1 << (6 + 7 + 7)) - 1 - 3
- } else if err != nil {
- panic(err)
- }
- return n - len(prefix)
- }
|