encodedword.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. package quotedprintable
  2. import (
  3. "bytes"
  4. "encoding/base64"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. )
  12. // A WordEncoder is a RFC 2047 encoded-word encoder.
  13. type WordEncoder byte
  14. const (
  15. // BEncoding represents Base64 encoding scheme as defined by RFC 2045.
  16. BEncoding = WordEncoder('b')
  17. // QEncoding represents the Q-encoding scheme as defined by RFC 2047.
  18. QEncoding = WordEncoder('q')
  19. )
  20. var (
  21. errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
  22. )
  23. // Encode returns the encoded-word form of s. If s is ASCII without special
  24. // characters, it is returned unchanged. The provided charset is the IANA
  25. // charset name of s. It is case insensitive.
  26. func (e WordEncoder) Encode(charset, s string) string {
  27. if !needsEncoding(s) {
  28. return s
  29. }
  30. return e.encodeWord(charset, s)
  31. }
  32. func needsEncoding(s string) bool {
  33. for _, b := range s {
  34. if (b < ' ' || b > '~') && b != '\t' {
  35. return true
  36. }
  37. }
  38. return false
  39. }
  40. // encodeWord encodes a string into an encoded-word.
  41. func (e WordEncoder) encodeWord(charset, s string) string {
  42. buf := getBuffer()
  43. defer putBuffer(buf)
  44. buf.WriteString("=?")
  45. buf.WriteString(charset)
  46. buf.WriteByte('?')
  47. buf.WriteByte(byte(e))
  48. buf.WriteByte('?')
  49. if e == BEncoding {
  50. w := base64.NewEncoder(base64.StdEncoding, buf)
  51. io.WriteString(w, s)
  52. w.Close()
  53. } else {
  54. enc := make([]byte, 3)
  55. for i := 0; i < len(s); i++ {
  56. b := s[i]
  57. switch {
  58. case b == ' ':
  59. buf.WriteByte('_')
  60. case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
  61. buf.WriteByte(b)
  62. default:
  63. enc[0] = '='
  64. enc[1] = upperhex[b>>4]
  65. enc[2] = upperhex[b&0x0f]
  66. buf.Write(enc)
  67. }
  68. }
  69. }
  70. buf.WriteString("?=")
  71. return buf.String()
  72. }
  73. const upperhex = "0123456789ABCDEF"
  74. // A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
  75. type WordDecoder struct {
  76. // CharsetReader, if non-nil, defines a function to generate
  77. // charset-conversion readers, converting from the provided
  78. // charset into UTF-8.
  79. // Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
  80. // are handled by default.
  81. // One of the the CharsetReader's result values must be non-nil.
  82. CharsetReader func(charset string, input io.Reader) (io.Reader, error)
  83. }
  84. // Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
  85. // word is returned unchanged.
  86. func (d *WordDecoder) Decode(word string) (string, error) {
  87. fields := strings.Split(word, "?") // TODO: remove allocation?
  88. if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
  89. return "", errInvalidWord
  90. }
  91. content, err := decode(fields[2][0], fields[3])
  92. if err != nil {
  93. return "", err
  94. }
  95. buf := getBuffer()
  96. defer putBuffer(buf)
  97. if err := d.convert(buf, fields[1], content); err != nil {
  98. return "", err
  99. }
  100. return buf.String(), nil
  101. }
  102. // DecodeHeader decodes all encoded-words of the given string. It returns an
  103. // error if and only if CharsetReader of d returns an error.
  104. func (d *WordDecoder) DecodeHeader(header string) (string, error) {
  105. // If there is no encoded-word, returns before creating a buffer.
  106. i := strings.Index(header, "=?")
  107. if i == -1 {
  108. return header, nil
  109. }
  110. buf := getBuffer()
  111. defer putBuffer(buf)
  112. buf.WriteString(header[:i])
  113. header = header[i:]
  114. betweenWords := false
  115. for {
  116. start := strings.Index(header, "=?")
  117. if start == -1 {
  118. break
  119. }
  120. cur := start + len("=?")
  121. i := strings.Index(header[cur:], "?")
  122. if i == -1 {
  123. break
  124. }
  125. charset := header[cur : cur+i]
  126. cur += i + len("?")
  127. if len(header) < cur+len("Q??=") {
  128. break
  129. }
  130. encoding := header[cur]
  131. cur++
  132. if header[cur] != '?' {
  133. break
  134. }
  135. cur++
  136. j := strings.Index(header[cur:], "?=")
  137. if j == -1 {
  138. break
  139. }
  140. text := header[cur : cur+j]
  141. end := cur + j + len("?=")
  142. content, err := decode(encoding, text)
  143. if err != nil {
  144. betweenWords = false
  145. buf.WriteString(header[:start+2])
  146. header = header[start+2:]
  147. continue
  148. }
  149. // Write characters before the encoded-word. White-space and newline
  150. // characters separating two encoded-words must be deleted.
  151. if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
  152. buf.WriteString(header[:start])
  153. }
  154. if err := d.convert(buf, charset, content); err != nil {
  155. return "", err
  156. }
  157. header = header[end:]
  158. betweenWords = true
  159. }
  160. if len(header) > 0 {
  161. buf.WriteString(header)
  162. }
  163. return buf.String(), nil
  164. }
  165. func decode(encoding byte, text string) ([]byte, error) {
  166. switch encoding {
  167. case 'B', 'b':
  168. return base64.StdEncoding.DecodeString(text)
  169. case 'Q', 'q':
  170. return qDecode(text)
  171. }
  172. return nil, errInvalidWord
  173. }
  174. func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
  175. switch {
  176. case strings.EqualFold("utf-8", charset):
  177. buf.Write(content)
  178. case strings.EqualFold("iso-8859-1", charset):
  179. for _, c := range content {
  180. buf.WriteRune(rune(c))
  181. }
  182. case strings.EqualFold("us-ascii", charset):
  183. for _, c := range content {
  184. if c >= utf8.RuneSelf {
  185. buf.WriteRune(unicode.ReplacementChar)
  186. } else {
  187. buf.WriteByte(c)
  188. }
  189. }
  190. default:
  191. if d.CharsetReader == nil {
  192. return fmt.Errorf("mime: unhandled charset %q", charset)
  193. }
  194. r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
  195. if err != nil {
  196. return err
  197. }
  198. if _, err = buf.ReadFrom(r); err != nil {
  199. return err
  200. }
  201. }
  202. return nil
  203. }
  204. // hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
  205. // one byte of non-whitespace.
  206. func hasNonWhitespace(s string) bool {
  207. for _, b := range s {
  208. switch b {
  209. // Encoded-words can only be separated by linear white spaces which does
  210. // not include vertical tabs (\v).
  211. case ' ', '\t', '\n', '\r':
  212. default:
  213. return true
  214. }
  215. }
  216. return false
  217. }
  218. // qDecode decodes a Q encoded string.
  219. func qDecode(s string) ([]byte, error) {
  220. dec := make([]byte, len(s))
  221. n := 0
  222. for i := 0; i < len(s); i++ {
  223. switch c := s[i]; {
  224. case c == '_':
  225. dec[n] = ' '
  226. case c == '=':
  227. if i+2 >= len(s) {
  228. return nil, errInvalidWord
  229. }
  230. b, err := readHexByte(s[i+1], s[i+2])
  231. if err != nil {
  232. return nil, err
  233. }
  234. dec[n] = b
  235. i += 2
  236. case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
  237. dec[n] = c
  238. default:
  239. return nil, errInvalidWord
  240. }
  241. n++
  242. }
  243. return dec[:n], nil
  244. }