escape-code-parser.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. // License: GPLv3 Copyright: 2022, Kovid Goyal, <kovid at kovidgoyal.net>
  2. package wcswidth
  3. import (
  4. "bytes"
  5. "fmt"
  6. "kitty/tools/utils"
  7. )
  8. var _ = fmt.Print
  9. type parser_state uint8
  10. type csi_state uint8
  11. type csi_char_type uint8
  12. var bracketed_paste_start = []byte{'2', '0', '0', '~'}
  13. const (
  14. normal parser_state = iota
  15. esc
  16. csi
  17. st
  18. st_or_bel
  19. esc_st
  20. c1_st
  21. bracketed_paste
  22. )
  23. const (
  24. parameter csi_state = iota
  25. intermediate
  26. )
  27. const (
  28. unknown_csi_char csi_char_type = iota
  29. parameter_csi_char
  30. intermediate_csi_char
  31. final_csi_char
  32. )
  33. type EscapeCodeParser struct {
  34. state parser_state
  35. utf8_state, utf8_codep utils.UTF8State
  36. csi_state csi_state
  37. current_buffer []byte
  38. bracketed_paste_buffer []utils.UTF8State
  39. current_callback func([]byte) error
  40. ReplaceInvalidUtf8Bytes bool
  41. // Callbacks
  42. HandleRune func(rune) error
  43. HandleEndOfBracketedPaste func() error
  44. HandleCSI func([]byte) error
  45. HandleOSC func([]byte) error
  46. HandleDCS func([]byte) error
  47. HandlePM func([]byte) error
  48. HandleSOS func([]byte) error
  49. HandleAPC func([]byte) error
  50. }
  51. func (self *EscapeCodeParser) InBracketedPaste() bool { return self.state == bracketed_paste }
  52. func (self *EscapeCodeParser) ParseString(s string) error {
  53. return self.Parse(utils.UnsafeStringToBytes(s))
  54. }
  55. func (self *EscapeCodeParser) ParseByte(b byte) error {
  56. switch self.state {
  57. case normal, bracketed_paste:
  58. prev_utf8_state := self.utf8_state
  59. switch utils.DecodeUtf8(&self.utf8_state, &self.utf8_codep, b) {
  60. case utils.UTF8_ACCEPT:
  61. err := self.dispatch_char(self.utf8_codep)
  62. if err != nil {
  63. self.reset_state()
  64. return err
  65. }
  66. case utils.UTF8_REJECT:
  67. self.utf8_state = utils.UTF8_ACCEPT
  68. if prev_utf8_state != utils.UTF8_ACCEPT {
  69. // reparse this byte with state set to UTF8_ACCEPT
  70. return self.ParseByte(b)
  71. }
  72. if self.ReplaceInvalidUtf8Bytes {
  73. err := self.dispatch_char(utils.UTF8State(0xfffd))
  74. if err != nil {
  75. return err
  76. }
  77. }
  78. }
  79. default:
  80. err := self.dispatch_byte(b)
  81. if err != nil {
  82. self.reset_state()
  83. return err
  84. }
  85. }
  86. return nil
  87. }
  88. func (self *EscapeCodeParser) Parse(data []byte) error {
  89. for _, b := range data {
  90. err := self.ParseByte(b)
  91. if err != nil {
  92. return err
  93. }
  94. }
  95. return nil
  96. }
  97. func (self *EscapeCodeParser) Reset() {
  98. self.reset_state()
  99. }
  100. func (self *EscapeCodeParser) write_ch(ch byte) {
  101. self.current_buffer = append(self.current_buffer, ch)
  102. }
  103. func csi_type(ch byte) csi_char_type {
  104. if (0x30 <= ch && ch <= 0x3f) || ch == '-' {
  105. return parameter_csi_char
  106. }
  107. if 0x40 <= ch && ch <= 0x7E {
  108. return final_csi_char
  109. }
  110. if 0x20 <= ch && ch <= 0x2F {
  111. return intermediate_csi_char
  112. }
  113. return unknown_csi_char
  114. }
  115. func (self *EscapeCodeParser) reset_state() {
  116. self.current_buffer = self.current_buffer[:0]
  117. self.bracketed_paste_buffer = self.bracketed_paste_buffer[:0]
  118. self.state = normal
  119. self.utf8_state = utils.UTF8_ACCEPT
  120. self.utf8_codep = utils.UTF8_ACCEPT
  121. self.current_callback = nil
  122. self.csi_state = parameter
  123. }
  124. func (self *EscapeCodeParser) dispatch_esc_code() error {
  125. if self.state == csi && bytes.Equal(self.current_buffer, bracketed_paste_start) {
  126. self.reset_state()
  127. self.state = bracketed_paste
  128. return nil
  129. }
  130. var err error
  131. if self.current_callback != nil {
  132. err = self.current_callback(self.current_buffer)
  133. }
  134. self.reset_state()
  135. return err
  136. }
  137. func (self *EscapeCodeParser) invalid_escape_code() {
  138. self.reset_state()
  139. }
  140. func (self *EscapeCodeParser) dispatch_rune(ch utils.UTF8State) error {
  141. if self.HandleRune != nil {
  142. return self.HandleRune(rune(ch))
  143. }
  144. return nil
  145. }
  146. func (self *EscapeCodeParser) bp_buffer_equals(chars []utils.UTF8State) bool {
  147. if len(self.bracketed_paste_buffer) != len(chars) {
  148. return false
  149. }
  150. for i, q := range chars {
  151. if self.bracketed_paste_buffer[i] != q {
  152. return false
  153. }
  154. }
  155. return true
  156. }
  157. func (self *EscapeCodeParser) dispatch_char(ch utils.UTF8State) error {
  158. if self.state == bracketed_paste {
  159. dispatch := func() error {
  160. if len(self.bracketed_paste_buffer) > 0 {
  161. for _, c := range self.bracketed_paste_buffer {
  162. err := self.dispatch_rune(c)
  163. if err != nil {
  164. return err
  165. }
  166. }
  167. self.bracketed_paste_buffer = self.bracketed_paste_buffer[:0]
  168. }
  169. return self.dispatch_rune(ch)
  170. }
  171. handle_ch := func(chars ...utils.UTF8State) error {
  172. if self.bp_buffer_equals(chars) {
  173. self.bracketed_paste_buffer = append(self.bracketed_paste_buffer, ch)
  174. if self.bracketed_paste_buffer[len(self.bracketed_paste_buffer)-1] == '~' {
  175. self.reset_state()
  176. if self.HandleEndOfBracketedPaste != nil {
  177. if err := self.HandleEndOfBracketedPaste(); err != nil {
  178. return err
  179. }
  180. }
  181. }
  182. return nil
  183. } else {
  184. return dispatch()
  185. }
  186. }
  187. switch ch {
  188. case 0x1b:
  189. return handle_ch()
  190. case '[':
  191. return handle_ch(0x1b)
  192. case '2':
  193. return handle_ch(0x1b, '[')
  194. case '0':
  195. return handle_ch(0x1b, '[', '2')
  196. case '1':
  197. return handle_ch(0x1b, '[', '2', '0')
  198. case '~':
  199. return handle_ch(0x1b, '[', '2', '0', '1')
  200. default:
  201. return dispatch()
  202. }
  203. } // end self.state == bracketed_paste
  204. switch ch {
  205. case 0x1b:
  206. self.state = esc
  207. case 0x90:
  208. self.state = st
  209. self.current_callback = self.HandleDCS
  210. case 0x9b:
  211. self.state = csi
  212. self.current_callback = self.HandleCSI
  213. case 0x9d:
  214. self.state = st_or_bel
  215. self.current_callback = self.HandleOSC
  216. case 0x98:
  217. self.state = st
  218. self.current_callback = self.HandleSOS
  219. case 0x9e:
  220. self.state = st
  221. self.current_callback = self.HandlePM
  222. case 0x9f:
  223. self.state = st
  224. self.current_callback = self.HandleAPC
  225. default:
  226. return self.dispatch_rune(ch)
  227. }
  228. return nil
  229. }
  230. func (self *EscapeCodeParser) dispatch_byte(ch byte) error {
  231. switch self.state {
  232. case esc:
  233. switch ch {
  234. case 'P':
  235. self.state = st
  236. self.current_callback = self.HandleDCS
  237. case '[':
  238. self.state = csi
  239. self.csi_state = parameter
  240. self.current_callback = self.HandleCSI
  241. case ']':
  242. self.state = st_or_bel
  243. self.current_callback = self.HandleOSC
  244. case '^':
  245. self.state = st
  246. self.current_callback = self.HandlePM
  247. case '_':
  248. self.state = st
  249. self.current_callback = self.HandleAPC
  250. case 'D', 'E', 'H', 'M', 'N', 'O', 'Z', '6', '7', '8', '9', '=', '>', 'F', 'c', 'l', 'm', 'n', 'o', '|', '}', '~':
  251. default:
  252. // we drop this dangling Esc and reparse the byte after the esc
  253. self.reset_state()
  254. return self.ParseByte(ch)
  255. }
  256. case csi:
  257. self.write_ch(ch)
  258. switch self.csi_state {
  259. case parameter:
  260. switch csi_type(ch) {
  261. case intermediate_csi_char:
  262. self.csi_state = intermediate
  263. case final_csi_char:
  264. return self.dispatch_esc_code()
  265. case unknown_csi_char:
  266. self.invalid_escape_code()
  267. }
  268. case intermediate:
  269. switch csi_type(ch) {
  270. case parameter_csi_char, unknown_csi_char:
  271. self.invalid_escape_code()
  272. case final_csi_char:
  273. return self.dispatch_esc_code()
  274. }
  275. }
  276. case st_or_bel:
  277. if ch == 0x7 {
  278. return self.dispatch_esc_code()
  279. }
  280. fallthrough
  281. case st:
  282. if ch == 0x1b {
  283. self.state = esc_st
  284. } else if ch == 0xc2 {
  285. self.state = c1_st
  286. } else {
  287. self.write_ch(ch)
  288. }
  289. case esc_st:
  290. if ch == '\\' {
  291. return self.dispatch_esc_code()
  292. } else {
  293. self.state = st
  294. self.write_ch(0x1b)
  295. if ch != 0x1b {
  296. self.write_ch(ch)
  297. }
  298. }
  299. case c1_st:
  300. if ch == 0x9c {
  301. return self.dispatch_esc_code()
  302. } else {
  303. self.state = st
  304. self.write_ch(0xc2)
  305. self.write_ch(ch)
  306. }
  307. }
  308. return nil
  309. }