shlex.go 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /*
  2. Package shlex implements a simple lexer which splits input in to tokens using
  3. shell-style rules for quoting.
  4. The basic use case uses the default ASCII lexer to split a string into sub-strings:
  5. shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
  6. To process a stream of strings:
  7. l := NewLexer(os.Stdin)
  8. for ; token, err := l.Next(); err != nil {
  9. // process token
  10. }
  11. */
  12. package shlex
  13. import (
  14. "fmt"
  15. "kitty/tools/utils"
  16. "strings"
  17. "unicode/utf8"
  18. )
  19. type Word struct {
  20. Value string // The word is empty if EOF is reached
  21. Pos int // The position in the input string of the word or the trailer
  22. Err error // Indicates an error (unterminated string or trailing unescaped backslash)
  23. Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
  24. }
  25. type lexer_state int
  26. // Lexer state machine states
  27. const (
  28. lex_normal lexer_state = iota
  29. word
  30. string_without_escapes
  31. string_with_escapes
  32. )
  33. // Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
  34. type Lexer struct {
  35. state lexer_state
  36. src string
  37. src_sz, src_pos, word_start int
  38. buf strings.Builder
  39. }
  40. // NewLexer creates a new lexer from an input string.
  41. func NewLexer(x string) *Lexer {
  42. return &Lexer{src: x, src_sz: len(x)}
  43. }
  44. func (self *Lexer) start_word() {
  45. self.buf.Reset()
  46. self.word_start = self.src_pos - 1
  47. }
  48. func (self *Lexer) get_word() Word {
  49. return Word{Pos: self.word_start, Value: self.buf.String()}
  50. }
  51. func (self *Lexer) write_ch(ch byte) {
  52. self.buf.WriteByte(ch)
  53. }
  54. func (self *Lexer) write_escaped_ch() bool {
  55. ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
  56. if count > 0 {
  57. self.src_pos += count
  58. if ch != utf8.RuneError {
  59. self.buf.WriteRune(ch)
  60. }
  61. return true
  62. }
  63. return false
  64. }
  65. // Next returns the next word. At EOF Word.Value will be ""
  66. func (self *Lexer) Next() (ans Word) {
  67. const string_with_escapes_delim = '"'
  68. const string_without_escapes_delim = '\''
  69. const escape_char = '\\'
  70. for self.src_pos < self.src_sz {
  71. ch := self.src[self.src_pos]
  72. self.src_pos++
  73. switch self.state {
  74. case lex_normal:
  75. switch ch {
  76. case ' ', '\n', '\r', '\t':
  77. case string_with_escapes_delim:
  78. self.state = string_with_escapes
  79. self.start_word()
  80. case string_without_escapes_delim:
  81. self.state = string_without_escapes
  82. self.start_word()
  83. case escape_char:
  84. self.start_word()
  85. if !self.write_escaped_ch() {
  86. ans.Trailer = "\\"
  87. ans.Err = fmt.Errorf("Extra backslash at end of input")
  88. ans.Pos = self.word_start
  89. return
  90. }
  91. self.state = word
  92. default:
  93. self.state = word
  94. self.start_word()
  95. self.write_ch(ch)
  96. }
  97. case word:
  98. switch ch {
  99. case ' ', '\n', '\r', '\t':
  100. self.state = lex_normal
  101. if self.buf.Len() > 0 {
  102. return self.get_word()
  103. }
  104. case string_with_escapes_delim:
  105. self.state = string_with_escapes
  106. case string_without_escapes_delim:
  107. self.state = string_without_escapes
  108. case escape_char:
  109. if !self.write_escaped_ch() {
  110. ans.Pos = self.word_start
  111. ans.Trailer = self.buf.String() + "\\"
  112. ans.Err = fmt.Errorf("Extra backslash at end of input")
  113. return
  114. }
  115. default:
  116. self.write_ch(ch)
  117. }
  118. case string_without_escapes:
  119. switch ch {
  120. case string_without_escapes_delim:
  121. self.state = word
  122. default:
  123. self.write_ch(ch)
  124. }
  125. case string_with_escapes:
  126. switch ch {
  127. case string_with_escapes_delim:
  128. self.state = word
  129. case escape_char:
  130. self.write_escaped_ch()
  131. default:
  132. self.write_ch(ch)
  133. }
  134. }
  135. }
  136. switch self.state {
  137. case word:
  138. self.state = lex_normal
  139. if self.buf.Len() > 0 {
  140. return self.get_word()
  141. }
  142. case string_with_escapes, string_without_escapes:
  143. self.state = lex_normal
  144. ans.Trailer = self.buf.String()
  145. ans.Pos = self.word_start
  146. ans.Err = fmt.Errorf("Unterminated string at end of input")
  147. return
  148. case lex_normal:
  149. }
  150. return
  151. }
  152. // Split partitions a string into a slice of strings.
  153. func Split(s string) (ans []string, err error) {
  154. l := NewLexer(s)
  155. var word Word
  156. for {
  157. word = l.Next()
  158. if word.Err != nil {
  159. return ans, word.Err
  160. }
  161. if word.Value == "" {
  162. break
  163. }
  164. ans = append(ans, word.Value)
  165. }
  166. return
  167. }
  168. func Quote(s string) string {
  169. if s == "" {
  170. return s
  171. }
  172. if utils.MustCompile(`[^\w@%+=:,./-]`).MatchString(s) {
  173. return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'"
  174. }
  175. return s
  176. }
  177. // SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
  178. // more relaxed about errors and also adding an empty string at the end if s ends with a Space.
  179. func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
  180. t := NewLexer(s)
  181. argv = make([]string, 0, len(s)/4)
  182. for {
  183. word := t.Next()
  184. if word.Value == "" {
  185. if word.Trailer == "" {
  186. trimmed := strings.TrimRight(s, " ")
  187. if len(trimmed) < len(s) { // trailing spaces
  188. pos := position_of_last_arg
  189. if len(argv) > 0 {
  190. pos += len(argv[len(argv)-1])
  191. }
  192. if pos < len(s) { // trailing whitespace
  193. argv = append(argv, "")
  194. position_of_last_arg += len(s) - pos + 1
  195. }
  196. }
  197. } else {
  198. argv = append(argv, word.Trailer)
  199. position_of_last_arg = word.Pos
  200. }
  201. break
  202. }
  203. position_of_last_arg = word.Pos
  204. argv = append(argv, word.Value)
  205. }
  206. return
  207. }