tabwriter.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package tabwriter implements a write filter (tabwriter.Writer) that
  5. // translates tabbed columns in input into properly aligned text.
  6. //
  7. // The package is using the Elastic Tabstops algorithm described at
  8. // http://nickgravgaard.com/elastictabstops/index.html.
  9. //
  10. package tabwriter
  11. import (
  12. "bytes"
  13. "io"
  14. "unicode/utf8"
  15. )
  16. // ----------------------------------------------------------------------------
  17. // Filter implementation
  18. // A cell represents a segment of text terminated by tabs or line breaks.
  19. // The text itself is stored in a separate buffer; cell only describes the
  20. // segment's size in bytes, its width in runes, and whether it's an htab
  21. // ('\t') terminated cell.
  22. //
  23. type cell struct {
  24. size int // cell size in bytes
  25. width int // cell width in runes
  26. htab bool // true if the cell is terminated by an htab ('\t')
  27. }
  28. // A Writer is a filter that inserts padding around tab-delimited
  29. // columns in its input to align them in the output.
  30. //
  31. // The Writer treats incoming bytes as UTF-8 encoded text consisting
  32. // of cells terminated by (horizontal or vertical) tabs or line
  33. // breaks (newline or formfeed characters). Cells in adjacent lines
  34. // constitute a column. The Writer inserts padding as needed to
  35. // make all cells in a column have the same width, effectively
  36. // aligning the columns. It assumes that all characters have the
  37. // same width except for tabs for which a tabwidth must be specified.
  38. // Note that cells are tab-terminated, not tab-separated: trailing
  39. // non-tab text at the end of a line does not form a column cell.
  40. //
  41. // The Writer assumes that all Unicode code points have the same width;
  42. // this may not be true in some fonts.
  43. //
  44. // If DiscardEmptyColumns is set, empty columns that are terminated
  45. // entirely by vertical (or "soft") tabs are discarded. Columns
  46. // terminated by horizontal (or "hard") tabs are not affected by
  47. // this flag.
  48. //
  49. // If a Writer is configured to filter HTML, HTML tags and entities
  50. // are passed through. The widths of tags and entities are
  51. // assumed to be zero (tags) and one (entities) for formatting purposes.
  52. //
  53. // A segment of text may be escaped by bracketing it with Escape
  54. // characters. The tabwriter passes escaped text segments through
  55. // unchanged. In particular, it does not interpret any tabs or line
  56. // breaks within the segment. If the StripEscape flag is set, the
  57. // Escape characters are stripped from the output; otherwise they
  58. // are passed through as well. For the purpose of formatting, the
  59. // width of the escaped text is always computed excluding the Escape
  60. // characters.
  61. //
  62. // The formfeed character ('\f') acts like a newline but it also
  63. // terminates all columns in the current line (effectively calling
  64. // Flush). Cells in the next line start new columns. Unless found
  65. // inside an HTML tag or inside an escaped text segment, formfeed
  66. // characters appear as newlines in the output.
  67. //
  68. // The Writer must buffer input internally, because proper spacing
  69. // of one line may depend on the cells in future lines. Clients must
  70. // call Flush when done calling Write.
  71. //
  72. type Writer struct {
  73. // configuration
  74. output io.Writer
  75. minwidth int
  76. tabwidth int
  77. padding int
  78. padbytes [8]byte
  79. flags uint
  80. // current state
  81. buf bytes.Buffer // collected text excluding tabs or line breaks
  82. pos int // buffer position up to which cell.width of incomplete cell has been computed
  83. cell cell // current incomplete cell; cell.width is up to buf[pos] excluding ignored sections
  84. endChar byte // terminating char of escaped sequence (Escape for escapes, '>', ';' for HTML tags/entities, or 0)
  85. lines [][]cell // list of lines; each line is a list of cells
  86. widths []int // list of column widths in runes - re-used during formatting
  87. }
  88. func (b *Writer) addLine() { b.lines = append(b.lines, []cell{}) }
  89. // Reset the current state.
  90. func (b *Writer) reset() {
  91. b.buf.Reset()
  92. b.pos = 0
  93. b.cell = cell{}
  94. b.endChar = 0
  95. b.lines = b.lines[0:0]
  96. b.widths = b.widths[0:0]
  97. b.addLine()
  98. }
  99. // Internal representation (current state):
  100. //
  101. // - all text written is appended to buf; tabs and line breaks are stripped away
  102. // - at any given time there is a (possibly empty) incomplete cell at the end
  103. // (the cell starts after a tab or line break)
  104. // - cell.size is the number of bytes belonging to the cell so far
  105. // - cell.width is text width in runes of that cell from the start of the cell to
  106. // position pos; html tags and entities are excluded from this width if html
  107. // filtering is enabled
  108. // - the sizes and widths of processed text are kept in the lines list
  109. // which contains a list of cells for each line
  110. // - the widths list is a temporary list with current widths used during
  111. // formatting; it is kept in Writer because it's re-used
  112. //
  113. // |<---------- size ---------->|
  114. // | |
  115. // |<- width ->|<- ignored ->| |
  116. // | | | |
  117. // [---processed---tab------------<tag>...</tag>...]
  118. // ^ ^ ^
  119. // | | |
  120. // buf start of incomplete cell pos
  121. // Formatting can be controlled with these flags.
  122. const (
  123. // Ignore html tags and treat entities (starting with '&'
  124. // and ending in ';') as single characters (width = 1).
  125. FilterHTML uint = 1 << iota
  126. // Strip Escape characters bracketing escaped text segments
  127. // instead of passing them through unchanged with the text.
  128. StripEscape
  129. // Force right-alignment of cell content.
  130. // Default is left-alignment.
  131. AlignRight
  132. // Handle empty columns as if they were not present in
  133. // the input in the first place.
  134. DiscardEmptyColumns
  135. // Always use tabs for indentation columns (i.e., padding of
  136. // leading empty cells on the left) independent of padchar.
  137. TabIndent
  138. // Print a vertical bar ('|') between columns (after formatting).
  139. // Discarded columns appear as zero-width columns ("||").
  140. Debug
  141. )
  142. // A Writer must be initialized with a call to Init. The first parameter (output)
  143. // specifies the filter output. The remaining parameters control the formatting:
  144. //
  145. // minwidth minimal cell width including any padding
  146. // tabwidth width of tab characters (equivalent number of spaces)
  147. // padding padding added to a cell before computing its width
  148. // padchar ASCII char used for padding
  149. // if padchar == '\t', the Writer will assume that the
  150. // width of a '\t' in the formatted output is tabwidth,
  151. // and cells are left-aligned independent of align_left
  152. // (for correct-looking results, tabwidth must correspond
  153. // to the tab width in the viewer displaying the result)
  154. // flags formatting control
  155. //
  156. func (b *Writer) Init(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
  157. if minwidth < 0 || tabwidth < 0 || padding < 0 {
  158. panic("negative minwidth, tabwidth, or padding")
  159. }
  160. b.output = output
  161. b.minwidth = minwidth
  162. b.tabwidth = tabwidth
  163. b.padding = padding
  164. for i := range b.padbytes {
  165. b.padbytes[i] = padchar
  166. }
  167. if padchar == '\t' {
  168. // tab padding enforces left-alignment
  169. flags &^= AlignRight
  170. }
  171. b.flags = flags
  172. b.reset()
  173. return b
  174. }
  175. // debugging support (keep code around)
  176. func (b *Writer) dump() {
  177. pos := 0
  178. for i, line := range b.lines {
  179. print("(", i, ") ")
  180. for _, c := range line {
  181. print("[", string(b.buf.Bytes()[pos:pos+c.size]), "]")
  182. pos += c.size
  183. }
  184. print("\n")
  185. }
  186. print("\n")
  187. }
  188. // local error wrapper so we can distinguish errors we want to return
  189. // as errors from genuine panics (which we don't want to return as errors)
  190. type osError struct {
  191. err error
  192. }
  193. func (b *Writer) write0(buf []byte) {
  194. n, err := b.output.Write(buf)
  195. if n != len(buf) && err == nil {
  196. err = io.ErrShortWrite
  197. }
  198. if err != nil {
  199. panic(osError{err})
  200. }
  201. }
  202. func (b *Writer) writeN(src []byte, n int) {
  203. for n > len(src) {
  204. b.write0(src)
  205. n -= len(src)
  206. }
  207. b.write0(src[0:n])
  208. }
  209. var (
  210. newline = []byte{'\n'}
  211. tabs = []byte("\t\t\t\t\t\t\t\t")
  212. )
  213. func (b *Writer) writePadding(textw, cellw int, useTabs bool) {
  214. if b.padbytes[0] == '\t' || useTabs {
  215. // padding is done with tabs
  216. if b.tabwidth == 0 {
  217. return // tabs have no width - can't do any padding
  218. }
  219. // make cellw the smallest multiple of b.tabwidth
  220. cellw = (cellw + b.tabwidth - 1) / b.tabwidth * b.tabwidth
  221. n := cellw - textw // amount of padding
  222. if n < 0 {
  223. panic("internal error")
  224. }
  225. b.writeN(tabs, (n+b.tabwidth-1)/b.tabwidth)
  226. return
  227. }
  228. // padding is done with non-tab characters
  229. b.writeN(b.padbytes[0:], cellw-textw)
  230. }
  231. var vbar = []byte{'|'}
  232. func (b *Writer) writeLines(pos0 int, line0, line1 int) (pos int) {
  233. pos = pos0
  234. for i := line0; i < line1; i++ {
  235. line := b.lines[i]
  236. // if TabIndent is set, use tabs to pad leading empty cells
  237. useTabs := b.flags&TabIndent != 0
  238. for j, c := range line {
  239. if j > 0 && b.flags&Debug != 0 {
  240. // indicate column break
  241. b.write0(vbar)
  242. }
  243. if c.size == 0 {
  244. // empty cell
  245. if j < len(b.widths) {
  246. b.writePadding(c.width, b.widths[j], useTabs)
  247. }
  248. } else {
  249. // non-empty cell
  250. useTabs = false
  251. if b.flags&AlignRight == 0 { // align left
  252. b.write0(b.buf.Bytes()[pos : pos+c.size])
  253. pos += c.size
  254. if j < len(b.widths) {
  255. b.writePadding(c.width, b.widths[j], false)
  256. }
  257. } else { // align right
  258. if j < len(b.widths) {
  259. b.writePadding(c.width, b.widths[j], false)
  260. }
  261. b.write0(b.buf.Bytes()[pos : pos+c.size])
  262. pos += c.size
  263. }
  264. }
  265. }
  266. if i+1 == len(b.lines) {
  267. // last buffered line - we don't have a newline, so just write
  268. // any outstanding buffered data
  269. b.write0(b.buf.Bytes()[pos : pos+b.cell.size])
  270. pos += b.cell.size
  271. } else {
  272. // not the last line - write newline
  273. b.write0(newline)
  274. }
  275. }
  276. return
  277. }
  278. // Format the text between line0 and line1 (excluding line1); pos
  279. // is the buffer position corresponding to the beginning of line0.
  280. // Returns the buffer position corresponding to the beginning of
  281. // line1 and an error, if any.
  282. //
  283. func (b *Writer) format(pos0 int, line0, line1 int) (pos int) {
  284. pos = pos0
  285. column := len(b.widths)
  286. for this := line0; this < line1; this++ {
  287. line := b.lines[this]
  288. if column < len(line)-1 {
  289. // cell exists in this column => this line
  290. // has more cells than the previous line
  291. // (the last cell per line is ignored because cells are
  292. // tab-terminated; the last cell per line describes the
  293. // text before the newline/formfeed and does not belong
  294. // to a column)
  295. // print unprinted lines until beginning of block
  296. pos = b.writeLines(pos, line0, this)
  297. line0 = this
  298. // column block begin
  299. width := b.minwidth // minimal column width
  300. discardable := true // true if all cells in this column are empty and "soft"
  301. for ; this < line1; this++ {
  302. line = b.lines[this]
  303. if column < len(line)-1 {
  304. // cell exists in this column
  305. c := line[column]
  306. // update width
  307. if w := c.width + b.padding; w > width {
  308. width = w
  309. }
  310. // update discardable
  311. if c.width > 0 || c.htab {
  312. discardable = false
  313. }
  314. } else {
  315. break
  316. }
  317. }
  318. // column block end
  319. // discard empty columns if necessary
  320. if discardable && b.flags&DiscardEmptyColumns != 0 {
  321. width = 0
  322. }
  323. // format and print all columns to the right of this column
  324. // (we know the widths of this column and all columns to the left)
  325. b.widths = append(b.widths, width) // push width
  326. pos = b.format(pos, line0, this)
  327. b.widths = b.widths[0 : len(b.widths)-1] // pop width
  328. line0 = this
  329. }
  330. }
  331. // print unprinted lines until end
  332. return b.writeLines(pos, line0, line1)
  333. }
  334. // Append text to current cell.
  335. func (b *Writer) append(text []byte) {
  336. b.buf.Write(text)
  337. b.cell.size += len(text)
  338. }
  339. // Update the cell width.
  340. func (b *Writer) updateWidth() {
  341. b.cell.width += utf8.RuneCount(b.buf.Bytes()[b.pos:b.buf.Len()])
  342. b.pos = b.buf.Len()
  343. }
  344. // To escape a text segment, bracket it with Escape characters.
  345. // For instance, the tab in this string "Ignore this tab: \xff\t\xff"
  346. // does not terminate a cell and constitutes a single character of
  347. // width one for formatting purposes.
  348. //
  349. // The value 0xff was chosen because it cannot appear in a valid UTF-8 sequence.
  350. //
  351. const Escape = '\xff'
  352. // Start escaped mode.
  353. func (b *Writer) startEscape(ch byte) {
  354. switch ch {
  355. case Escape:
  356. b.endChar = Escape
  357. case '<':
  358. b.endChar = '>'
  359. case '&':
  360. b.endChar = ';'
  361. }
  362. }
  363. // Terminate escaped mode. If the escaped text was an HTML tag, its width
  364. // is assumed to be zero for formatting purposes; if it was an HTML entity,
  365. // its width is assumed to be one. In all other cases, the width is the
  366. // unicode width of the text.
  367. //
  368. func (b *Writer) endEscape() {
  369. switch b.endChar {
  370. case Escape:
  371. b.updateWidth()
  372. if b.flags&StripEscape == 0 {
  373. b.cell.width -= 2 // don't count the Escape chars
  374. }
  375. case '>': // tag of zero width
  376. case ';':
  377. b.cell.width++ // entity, count as one rune
  378. }
  379. b.pos = b.buf.Len()
  380. b.endChar = 0
  381. }
  382. // Terminate the current cell by adding it to the list of cells of the
  383. // current line. Returns the number of cells in that line.
  384. //
  385. func (b *Writer) terminateCell(htab bool) int {
  386. b.cell.htab = htab
  387. line := &b.lines[len(b.lines)-1]
  388. *line = append(*line, b.cell)
  389. b.cell = cell{}
  390. return len(*line)
  391. }
  392. func handlePanic(err *error, op string) {
  393. if e := recover(); e != nil {
  394. if nerr, ok := e.(osError); ok {
  395. *err = nerr.err
  396. return
  397. }
  398. panic("tabwriter: panic during " + op)
  399. }
  400. }
  401. // Flush should be called after the last call to Write to ensure
  402. // that any data buffered in the Writer is written to output. Any
  403. // incomplete escape sequence at the end is considered
  404. // complete for formatting purposes.
  405. //
  406. func (b *Writer) Flush() (err error) {
  407. defer b.reset() // even in the presence of errors
  408. defer handlePanic(&err, "Flush")
  409. // add current cell if not empty
  410. if b.cell.size > 0 {
  411. if b.endChar != 0 {
  412. // inside escape - terminate it even if incomplete
  413. b.endEscape()
  414. }
  415. b.terminateCell(false)
  416. }
  417. // format contents of buffer
  418. b.format(0, 0, len(b.lines))
  419. return
  420. }
  421. var hbar = []byte("---\n")
  422. // Write writes buf to the writer b.
  423. // The only errors returned are ones encountered
  424. // while writing to the underlying output stream.
  425. //
  426. func (b *Writer) Write(buf []byte) (n int, err error) {
  427. defer handlePanic(&err, "Write")
  428. // split text into cells
  429. n = 0
  430. for i, ch := range buf {
  431. if b.endChar == 0 {
  432. // outside escape
  433. switch ch {
  434. case '\t', '\v', '\n', '\f':
  435. // end of cell
  436. b.append(buf[n:i])
  437. b.updateWidth()
  438. n = i + 1 // ch consumed
  439. ncells := b.terminateCell(ch == '\t')
  440. if ch == '\n' || ch == '\f' {
  441. // terminate line
  442. b.addLine()
  443. if ch == '\f' || ncells == 1 {
  444. // A '\f' always forces a flush. Otherwise, if the previous
  445. // line has only one cell which does not have an impact on
  446. // the formatting of the following lines (the last cell per
  447. // line is ignored by format()), thus we can flush the
  448. // Writer contents.
  449. if err = b.Flush(); err != nil {
  450. return
  451. }
  452. if ch == '\f' && b.flags&Debug != 0 {
  453. // indicate section break
  454. b.write0(hbar)
  455. }
  456. }
  457. }
  458. case Escape:
  459. // start of escaped sequence
  460. b.append(buf[n:i])
  461. b.updateWidth()
  462. n = i
  463. if b.flags&StripEscape != 0 {
  464. n++ // strip Escape
  465. }
  466. b.startEscape(Escape)
  467. case '<', '&':
  468. // possibly an html tag/entity
  469. if b.flags&FilterHTML != 0 {
  470. // begin of tag/entity
  471. b.append(buf[n:i])
  472. b.updateWidth()
  473. n = i
  474. b.startEscape(ch)
  475. }
  476. }
  477. } else {
  478. // inside escape
  479. if ch == b.endChar {
  480. // end of tag/entity
  481. j := i + 1
  482. if ch == Escape && b.flags&StripEscape != 0 {
  483. j = i // strip Escape
  484. }
  485. b.append(buf[n:j])
  486. n = i + 1 // ch consumed
  487. b.endEscape()
  488. }
  489. }
  490. }
  491. // append leftover text
  492. b.append(buf[n:])
  493. n = len(buf)
  494. return
  495. }
  496. // NewWriter allocates and initializes a new tabwriter.Writer.
  497. // The parameters are the same as for the Init function.
  498. //
  499. func NewWriter(output io.Writer, minwidth, tabwidth, padding int, padchar byte, flags uint) *Writer {
  500. return new(Writer).Init(output, minwidth, tabwidth, padding, padchar, flags)
  501. }