encapsulation.go 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // Package encapsulation implements a way of encoding variable-size chunks of
  2. // data and padding into a byte stream.
  3. //
  4. // Each chunk of data or padding starts with a variable-size length prefix. One
  5. // bit ("d") in the first byte of the prefix indicates whether the chunk
  6. // represents data or padding (1=data, 0=padding). Another bit ("c" for
  7. // "continuation") is the indicates whether there are more bytes in the length
  8. // prefix. The remaining 6 bits ("x") encode part of the length value.
  9. //
  10. // dcxxxxxx
  11. //
  12. // If the continuation bit is set, then the next byte is also part of the length
  13. // prefix. It lacks the "d" bit, has its own "c" bit, and 7 value-carrying bits
  14. // ("y").
  15. //
  16. // cyyyyyyy
  17. //
  18. // The length is decoded by concatenating value-carrying bits, from left to
  19. // right, of all value-carrying bits, up to and including the first byte whose
  20. // "c" bit is 0. Although in principle this encoding would allow for length
  21. // prefixes of any size, length prefixes are arbitrarily limited to 3 bytes and
  22. // any attempt to read or write a longer one is an error. These are therefore
  23. // the only valid formats:
  24. //
  25. // 00xxxxxx xxxxxx₂ bytes of padding
  26. // 10xxxxxx xxxxxx₂ bytes of data
  27. // 01xxxxxx 0yyyyyyy xxxxxxyyyyyyy₂ bytes of padding
  28. // 11xxxxxx 0yyyyyyy xxxxxxyyyyyyy₂ bytes of data
  29. // 01xxxxxx 1yyyyyyy 0zzzzzzz xxxxxxyyyyyyyzzzzzzz₂ bytes of padding
  30. // 11xxxxxx 1yyyyyyy 0zzzzzzz xxxxxxyyyyyyyzzzzzzz₂ bytes of data
  31. //
  32. // The maximum encodable length is 11111111111111111111₂ = 0xfffff = 1048575.
  33. // There is no requirement to use a length prefix of minimum size; i.e. 00000100
  34. // and 01000000 00000100 are both valid encodings of the value 4.
  35. //
  36. // After the length prefix follow that many bytes of padding or data. There are
  37. // no restrictions on the value of bytes comprising padding.
  38. //
  39. // The idea for this encapsulation is sketched here:
  40. // https://github.com/net4people/bbs/issues/9#issuecomment-524095186
  41. package encapsulation
  42. import (
  43. "errors"
  44. "io"
  45. )
  46. // ErrTooLong is the error returned when an encoded length prefix is longer than
  47. // 3 bytes, or when ReadData receives an input whose length is too large to
  48. // encode in a 3-byte length prefix.
  49. var ErrTooLong = errors.New("length prefix is too long")
  50. // ReadData the next available data chunk, skipping over any padding chunks that
  51. // may come first, and copies the data into p. If p is shorter than the length
  52. // of the data chunk, only the first len(p) bytes are copied into p, and the
  53. // error return is io.ErrShortBuffer. The returned error value is nil if and
  54. // only if a data chunk was present and was read in its entirety. The returned
  55. // error is io.EOF only if r ended before the first byte of a length prefix. If
  56. // r ended in the middle of a length prefix or data/padding, the returned error
  57. // is io.ErrUnexpectedEOF.
  58. func ReadData(r io.Reader, p []byte) (int, error) {
  59. for {
  60. var b [1]byte
  61. _, err := r.Read(b[:])
  62. if err != nil {
  63. // This is the only place we may return a real io.EOF.
  64. return 0, err
  65. }
  66. isData := (b[0] & 0x80) != 0
  67. moreLength := (b[0] & 0x40) != 0
  68. n := int(b[0] & 0x3f)
  69. for i := 0; moreLength; i++ {
  70. if i >= 2 {
  71. return 0, ErrTooLong
  72. }
  73. _, err := r.Read(b[:])
  74. if err == io.EOF {
  75. err = io.ErrUnexpectedEOF
  76. }
  77. if err != nil {
  78. return 0, err
  79. }
  80. moreLength = (b[0] & 0x80) != 0
  81. n = (n << 7) | int(b[0]&0x7f)
  82. }
  83. if isData {
  84. if len(p) > n {
  85. p = p[:n]
  86. }
  87. numData, err := io.ReadFull(r, p)
  88. if err == nil && numData < n {
  89. // If the caller's buffer was too short, discard
  90. // the rest of the data and return
  91. // io.ErrShortBuffer.
  92. _, err = io.CopyN(io.Discard, r, int64(n-numData))
  93. if err == nil {
  94. err = io.ErrShortBuffer
  95. }
  96. }
  97. if err == io.EOF {
  98. err = io.ErrUnexpectedEOF
  99. }
  100. return numData, err
  101. } else if n > 0 {
  102. _, err := io.CopyN(io.Discard, r, int64(n))
  103. if err == io.EOF {
  104. err = io.ErrUnexpectedEOF
  105. }
  106. if err != nil {
  107. return 0, err
  108. }
  109. }
  110. }
  111. }
  112. // dataPrefixForLength returns a length prefix for the given length, with the
  113. // "d" bit set to 1.
  114. func dataPrefixForLength(n int) ([]byte, error) {
  115. switch {
  116. case (n>>0)&0x3f == (n >> 0):
  117. return []byte{0x80 | byte((n>>0)&0x3f)}, nil
  118. case (n>>7)&0x3f == (n >> 7):
  119. return []byte{0xc0 | byte((n>>7)&0x3f), byte((n >> 0) & 0x7f)}, nil
  120. case (n>>14)&0x3f == (n >> 14):
  121. return []byte{0xc0 | byte((n>>14)&0x3f), 0x80 | byte((n>>7)&0x7f), byte((n >> 0) & 0x7f)}, nil
  122. default:
  123. return nil, ErrTooLong
  124. }
  125. }
  126. // WriteData encodes a data chunk into w. It returns the total number of bytes
  127. // written; i.e., including the length prefix. The error is ErrTooLong if the
  128. // length of data cannot fit into a length prefix.
  129. func WriteData(w io.Writer, data []byte) (int, error) {
  130. prefix, err := dataPrefixForLength(len(data))
  131. if err != nil {
  132. return 0, err
  133. }
  134. total := 0
  135. n, err := w.Write(prefix)
  136. total += n
  137. if err != nil {
  138. return total, err
  139. }
  140. n, err = w.Write(data)
  141. total += n
  142. return total, err
  143. }
  144. var paddingBuffer [1024]byte
  145. // WritePadding encodes padding chunks, whose total size (including their own
  146. // length prefixes) is n. Returns the total number of bytes written to w, which
  147. // will be exactly n unless there was an error. The error cannot be ErrTooLong
  148. // because this function will write multiple padding chunks if necessary to
  149. // reach the requested size. Panics if n is negative.
  150. func WritePadding(w io.Writer, n int) (int, error) {
  151. if n < 0 {
  152. panic("negative length")
  153. }
  154. total := 0
  155. for n > 0 {
  156. p := len(paddingBuffer)
  157. if p > n {
  158. p = n
  159. }
  160. n -= p
  161. var prefix []byte
  162. switch {
  163. case ((p-1)>>0)&0x3f == ((p - 1) >> 0):
  164. p = p - 1
  165. prefix = []byte{byte((p >> 0) & 0x3f)}
  166. case ((p-2)>>7)&0x3f == ((p - 2) >> 7):
  167. p = p - 2
  168. prefix = []byte{0x40 | byte((p>>7)&0x3f), byte((p >> 0) & 0x7f)}
  169. case ((p-3)>>14)&0x3f == ((p - 3) >> 14):
  170. p = p - 3
  171. prefix = []byte{0x40 | byte((p>>14)&0x3f), 0x80 | byte((p>>7)&0x3f), byte((p >> 0) & 0x7f)}
  172. }
  173. nn, err := w.Write(prefix)
  174. total += nn
  175. if err != nil {
  176. return total, err
  177. }
  178. nn, err = w.Write(paddingBuffer[:p])
  179. total += nn
  180. if err != nil {
  181. return total, err
  182. }
  183. }
  184. return total, nil
  185. }
  186. // MaxDataForSize returns the length of the longest slice that can pe passed to
  187. // WriteData, whose total encoded size (including length prefix) is no larger
  188. // than n. Call this to find out if a chunk of data will fit into a length
  189. // budget. Panics if n == 0.
  190. func MaxDataForSize(n int) int {
  191. if n == 0 {
  192. panic("zero length")
  193. }
  194. prefix, err := dataPrefixForLength(n)
  195. if err == ErrTooLong {
  196. return (1 << (6 + 7 + 7)) - 1 - 3
  197. } else if err != nil {
  198. panic(err)
  199. }
  200. return n - len(prefix)
  201. }