marks.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. // License: GPLv3 Copyright: 2023, Kovid Goyal, <kovid at kovidgoyal.net>
  2. package hints
  3. import (
  4. "bytes"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "os/exec"
  9. "regexp"
  10. "slices"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "unicode"
  15. "unicode/utf8"
  16. "github.com/dlclark/regexp2"
  17. "github.com/seancfoley/ipaddress-go/ipaddr"
  18. "kitty"
  19. "kitty/tools/config"
  20. "kitty/tools/tty"
  21. "kitty/tools/utils"
  22. )
  23. var _ = fmt.Print
  24. const (
  25. DEFAULT_HINT_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
  26. FILE_EXTENSION = `\.(?:[a-zA-Z0-9]{2,7}|[ahcmo])(?:\b|[^.])`
  27. )
  28. func path_regex() string {
  29. return fmt.Sprintf(`(?:\S*?/[\r\S]+)|(?:\S[\r\S]*%s)\b`, FILE_EXTENSION)
  30. }
  31. func default_linenum_regex() string {
  32. return fmt.Sprintf(`(?P<path>%s):(?P<line>\d+)`, path_regex())
  33. }
  34. type Mark struct {
  35. Index int `json:"index"`
  36. Start int `json:"start"`
  37. End int `json:"end"`
  38. Text string `json:"text"`
  39. Group_id string `json:"group_id"`
  40. Is_hyperlink bool `json:"is_hyperlink"`
  41. Groupdict map[string]any `json:"groupdict"`
  42. }
  43. func process_escape_codes(text string) (ans string, hyperlinks []Mark) {
  44. removed_size, idx := 0, 0
  45. active_hyperlink_url := ""
  46. active_hyperlink_id := ""
  47. active_hyperlink_start_offset := 0
  48. add_hyperlink := func(end int) {
  49. hyperlinks = append(hyperlinks, Mark{
  50. Index: idx, Start: active_hyperlink_start_offset, End: end, Text: active_hyperlink_url, Is_hyperlink: true, Group_id: active_hyperlink_id})
  51. active_hyperlink_url, active_hyperlink_id = "", ""
  52. active_hyperlink_start_offset = 0
  53. idx++
  54. }
  55. ans = utils.ReplaceAll(utils.MustCompile("\x1b(?:\\[[0-9;:]*?m|\\].*?\x1b\\\\)"), text, func(raw string, groupdict map[string]utils.SubMatch) string {
  56. if !strings.HasPrefix(raw, "\x1b]8") {
  57. removed_size += len(raw)
  58. return ""
  59. }
  60. start := groupdict[""].Start - removed_size
  61. removed_size += len(raw)
  62. if active_hyperlink_url != "" {
  63. add_hyperlink(start)
  64. }
  65. raw = raw[4 : len(raw)-2]
  66. if metadata, url, found := strings.Cut(raw, ";"); found && url != "" {
  67. active_hyperlink_url = url
  68. active_hyperlink_start_offset = start
  69. if metadata != "" {
  70. for _, entry := range strings.Split(metadata, ":") {
  71. if strings.HasPrefix(entry, "id=") && len(entry) > 3 {
  72. active_hyperlink_id = entry[3:]
  73. }
  74. }
  75. }
  76. }
  77. return ""
  78. })
  79. if active_hyperlink_url != "" {
  80. add_hyperlink(len(ans))
  81. }
  82. return
  83. }
  84. type PostProcessorFunc = func(string, int, int) (int, int)
  85. type GroupProcessorFunc = func(map[string]string)
  86. func is_punctuation(b string) bool {
  87. switch b {
  88. case ",", ".", "?", "!":
  89. return true
  90. }
  91. return false
  92. }
  93. func closing_bracket_for(ch string) string {
  94. switch ch {
  95. case "(":
  96. return ")"
  97. case "[":
  98. return "]"
  99. case "{":
  100. return "}"
  101. case "<":
  102. return ">"
  103. case "*":
  104. return "*"
  105. case `"`:
  106. return `"`
  107. case "'":
  108. return "'"
  109. case "“":
  110. return "”"
  111. case "‘":
  112. return "’"
  113. }
  114. return ""
  115. }
  116. func char_at(s string, i int) string {
  117. ans, _ := utf8.DecodeRuneInString(s[i:])
  118. if ans == utf8.RuneError {
  119. return ""
  120. }
  121. return string(ans)
  122. }
  123. func matching_remover(openers ...string) PostProcessorFunc {
  124. return func(text string, s, e int) (int, int) {
  125. if s < e && e <= len(text) {
  126. before := char_at(text, s)
  127. if slices.Index(openers, before) > -1 {
  128. q := closing_bracket_for(before)
  129. if e > 0 && char_at(text, e-1) == q {
  130. s++
  131. e--
  132. } else if char_at(text, e) == q {
  133. s++
  134. }
  135. }
  136. }
  137. return s, e
  138. }
  139. }
  140. func linenum_group_processor(gd map[string]string) {
  141. pat := utils.MustCompile(`:\d+$`)
  142. gd[`path`] = pat.ReplaceAllStringFunc(gd["path"], func(m string) string {
  143. gd["line"] = m[1:]
  144. return ``
  145. })
  146. gd[`path`] = utils.Expanduser(gd[`path`])
  147. }
  148. var PostProcessorMap = sync.OnceValue(func() map[string]PostProcessorFunc {
  149. return map[string]PostProcessorFunc{
  150. "url": func(text string, s, e int) (int, int) {
  151. if s > 4 && text[s-5:s] == "link:" { // asciidoc URLs
  152. url := text[s:e]
  153. idx := strings.LastIndex(url, "[")
  154. if idx > -1 {
  155. e -= len(url) - idx
  156. }
  157. }
  158. for e > 1 && is_punctuation(char_at(text, e)) { // remove trailing punctuation
  159. e--
  160. }
  161. // truncate url at closing bracket/quote
  162. if s > 0 && e <= len(text) && closing_bracket_for(char_at(text, s-1)) != "" {
  163. q := closing_bracket_for(char_at(text, s-1))
  164. idx := strings.Index(text[s:], q)
  165. if idx > 0 {
  166. e = s + idx
  167. }
  168. }
  169. // reStructuredText URLs
  170. if e > 3 && text[e-2:e] == "`_" {
  171. e -= 2
  172. }
  173. return s, e
  174. },
  175. "brackets": matching_remover("(", "{", "[", "<"),
  176. "quotes": matching_remover("'", `"`, "“", "‘"),
  177. "ip": func(text string, s, e int) (int, int) {
  178. addr := ipaddr.NewHostName(text[s:e])
  179. if !addr.IsAddress() {
  180. return -1, -1
  181. }
  182. return s, e
  183. },
  184. }
  185. })
  186. type KittyOpts struct {
  187. Url_prefixes *utils.Set[string]
  188. Url_excluded_characters string
  189. Select_by_word_characters string
  190. }
  191. func read_relevant_kitty_opts() KittyOpts {
  192. ans := KittyOpts{
  193. Select_by_word_characters: kitty.KittyConfigDefaults.Select_by_word_characters,
  194. Url_excluded_characters: kitty.KittyConfigDefaults.Url_excluded_characters}
  195. handle_line := func(key, val string) error {
  196. switch key {
  197. case "url_prefixes":
  198. ans.Url_prefixes = utils.NewSetWithItems(strings.Split(val, " ")...)
  199. case "select_by_word_characters":
  200. ans.Select_by_word_characters = strings.TrimSpace(val)
  201. case "url_excluded_characters":
  202. if s, err := config.StringLiteral(val); err == nil {
  203. ans.Url_excluded_characters = s
  204. }
  205. }
  206. return nil
  207. }
  208. config.ReadKittyConfig(handle_line)
  209. if ans.Url_prefixes == nil {
  210. ans.Url_prefixes = utils.NewSetWithItems(kitty.KittyConfigDefaults.Url_prefixes...)
  211. }
  212. return ans
  213. }
  214. var RelevantKittyOpts = sync.OnceValue(func() KittyOpts {
  215. return read_relevant_kitty_opts()
  216. })
  217. var debugprintln = tty.DebugPrintln
  218. var _ = debugprintln
  219. func url_excluded_characters_as_ranges_for_regex(extra_excluded string) string {
  220. // See https://url.spec.whatwg.org/#url-code-points
  221. ans := strings.Builder{}
  222. ans.Grow(4096)
  223. type cr struct{ start, end rune }
  224. ranges := []cr{}
  225. r := func(start rune, end ...rune) {
  226. if len(end) == 0 {
  227. ranges = append(ranges, cr{start, start})
  228. } else {
  229. ranges = append(ranges, cr{start, end[0]})
  230. }
  231. }
  232. if !strings.Contains(extra_excluded, "\n") {
  233. r('\n')
  234. }
  235. if !strings.Contains(extra_excluded, "\r") {
  236. r('\r')
  237. }
  238. r('!')
  239. r('$')
  240. r('&')
  241. r('#')
  242. r('\'')
  243. r('/')
  244. r(':')
  245. r(';')
  246. r('@')
  247. r('_')
  248. r('~')
  249. r('(')
  250. r(')')
  251. r('*')
  252. r('+')
  253. r(',')
  254. r('-')
  255. r('.')
  256. r('=')
  257. r('?')
  258. r('%')
  259. r('a', 'z')
  260. r('A', 'Z')
  261. r('0', '9')
  262. slices.SortFunc(ranges, func(a, b cr) int { return int(a.start - b.start) })
  263. var prev rune = -1
  264. for _, cr := range ranges {
  265. if cr.start-1 > prev+1 {
  266. ans.WriteString(regexp.QuoteMeta(string(prev + 1)))
  267. ans.WriteRune('-')
  268. ans.WriteString(regexp.QuoteMeta(string(cr.start - 1)))
  269. }
  270. prev = cr.end
  271. }
  272. ans.WriteString(regexp.QuoteMeta(string(ranges[len(ranges)-1].end + 1)))
  273. ans.WriteRune('-')
  274. ans.WriteRune(0x9f)
  275. ans.WriteString(`\x{d800}-\x{dfff}`)
  276. ans.WriteString(`\x{fdd0}-\x{fdef}`)
  277. w := func(x rune) { ans.WriteRune(x) }
  278. w(0xFFFE)
  279. w(0xFFFF)
  280. w(0x1FFFE)
  281. w(0x1FFFF)
  282. w(0x2FFFE)
  283. w(0x2FFFF)
  284. w(0x3FFFE)
  285. w(0x3FFFF)
  286. w(0x4FFFE)
  287. w(0x4FFFF)
  288. w(0x5FFFE)
  289. w(0x5FFFF)
  290. w(0x6FFFE)
  291. w(0x6FFFF)
  292. w(0x7FFFE)
  293. w(0x7FFFF)
  294. w(0x8FFFE)
  295. w(0x8FFFF)
  296. w(0x9FFFE)
  297. w(0x9FFFF)
  298. w(0xAFFFE)
  299. w(0xAFFFF)
  300. w(0xBFFFE)
  301. w(0xBFFFF)
  302. w(0xCFFFE)
  303. w(0xCFFFF)
  304. w(0xDFFFE)
  305. w(0xDFFFF)
  306. w(0xEFFFE)
  307. w(0xEFFFF)
  308. w(0xFFFFE)
  309. w(0xFFFFF)
  310. if strings.Contains(extra_excluded, "-") {
  311. extra_excluded = strings.ReplaceAll(extra_excluded, "-", "")
  312. extra_excluded = regexp.QuoteMeta(extra_excluded) + "-"
  313. } else {
  314. extra_excluded = regexp.QuoteMeta(extra_excluded)
  315. }
  316. ans.WriteString(extra_excluded)
  317. return ans.String()
  318. }
  319. func functions_for(opts *Options) (pattern string, post_processors []PostProcessorFunc, group_processors []GroupProcessorFunc, err error) {
  320. switch opts.Type {
  321. case "url":
  322. var url_prefixes *utils.Set[string]
  323. if opts.UrlPrefixes == "default" {
  324. url_prefixes = RelevantKittyOpts().Url_prefixes
  325. } else {
  326. url_prefixes = utils.NewSetWithItems(strings.Split(opts.UrlPrefixes, ",")...)
  327. }
  328. url_excluded_characters := RelevantKittyOpts().Url_excluded_characters
  329. if opts.UrlExcludedCharacters != "default" {
  330. if url_excluded_characters, err = config.StringLiteral(opts.UrlExcludedCharacters); err != nil {
  331. err = fmt.Errorf("Failed to parse --url-excluded-characters value: %#v with error: %w", opts.UrlExcludedCharacters, err)
  332. return
  333. }
  334. }
  335. pattern = fmt.Sprintf(`(?:%s)://[^%s]{3,}`, strings.Join(url_prefixes.AsSlice(), "|"), url_excluded_characters_as_ranges_for_regex(url_excluded_characters))
  336. post_processors = append(post_processors, PostProcessorMap()["url"])
  337. case "path":
  338. pattern = path_regex()
  339. post_processors = append(post_processors, PostProcessorMap()["brackets"], PostProcessorMap()["quotes"])
  340. case "line":
  341. pattern = "(?m)^\\s*(.+)[\\s\x00]*$"
  342. case "hash":
  343. pattern = "[0-9a-f][0-9a-f\r]{6,127}"
  344. case "ip":
  345. pattern = (
  346. // IPv4 with no validation
  347. `((?:\d{1,3}\.){3}\d{1,3}` + "|" +
  348. // IPv6 with no validation
  349. `(?:[a-fA-F0-9]{0,4}:){2,7}[a-fA-F0-9]{1,4})`)
  350. post_processors = append(post_processors, PostProcessorMap()["ip"])
  351. default:
  352. pattern = opts.Regex
  353. if opts.Type == "linenum" {
  354. if pattern == kitty.HintsDefaultRegex {
  355. pattern = default_linenum_regex()
  356. }
  357. post_processors = append(post_processors, PostProcessorMap()["brackets"], PostProcessorMap()["quotes"])
  358. group_processors = append(group_processors, linenum_group_processor)
  359. }
  360. }
  361. return
  362. }
  363. type Capture struct {
  364. Text string
  365. Text_as_runes []rune
  366. Byte_Offsets struct {
  367. Start, End int
  368. }
  369. Rune_Offsets struct {
  370. Start, End int
  371. }
  372. }
  373. func (self Capture) String() string {
  374. return fmt.Sprintf("Capture(start=%d, end=%d, %#v)", self.Byte_Offsets.Start, self.Byte_Offsets.End, self.Text)
  375. }
  376. type Group struct {
  377. Name string
  378. IsNamed bool
  379. Captures []Capture
  380. }
  381. func (self Group) LastCapture() Capture {
  382. if len(self.Captures) == 0 {
  383. return Capture{}
  384. }
  385. return self.Captures[len(self.Captures)-1]
  386. }
  387. func (self Group) String() string {
  388. return fmt.Sprintf("Group(name=%#v, captures=%v)", self.Name, self.Captures)
  389. }
  390. type Match struct {
  391. Groups []Group
  392. }
  393. func (self Match) HasNamedGroups() bool {
  394. for _, g := range self.Groups {
  395. if g.IsNamed {
  396. return true
  397. }
  398. }
  399. return false
  400. }
  401. func find_all_matches(re *regexp2.Regexp, text string) (ans []Match, err error) {
  402. m, err := re.FindStringMatch(text)
  403. if err != nil {
  404. return
  405. }
  406. rune_to_bytes := utils.RuneOffsetsToByteOffsets(text)
  407. get_byte_offset_map := func(groups []regexp2.Group) (ans map[int]int, err error) {
  408. ans = make(map[int]int, len(groups)*2)
  409. rune_offsets := make([]int, 0, len(groups)*2)
  410. for _, g := range groups {
  411. for _, c := range g.Captures {
  412. if _, found := ans[c.Index]; !found {
  413. rune_offsets = append(rune_offsets, c.Index)
  414. ans[c.Index] = -1
  415. }
  416. end := c.Index + c.Length
  417. if _, found := ans[end]; !found {
  418. rune_offsets = append(rune_offsets, end)
  419. ans[end] = -1
  420. }
  421. }
  422. }
  423. slices.Sort(rune_offsets)
  424. for _, pos := range rune_offsets {
  425. if ans[pos] = rune_to_bytes(pos); ans[pos] < 0 {
  426. return nil, fmt.Errorf("Matches are not monotonic cannot map rune offsets to byte offsets")
  427. }
  428. }
  429. return
  430. }
  431. for m != nil {
  432. groups := m.Groups()
  433. bom, err := get_byte_offset_map(groups)
  434. if err != nil {
  435. return nil, err
  436. }
  437. match := Match{Groups: make([]Group, len(groups))}
  438. for i, g := range m.Groups() {
  439. match.Groups[i].Name = g.Name
  440. match.Groups[i].IsNamed = g.Name != "" && g.Name != strconv.Itoa(i)
  441. for _, c := range g.Captures {
  442. cn := Capture{Text: c.String(), Text_as_runes: c.Runes()}
  443. cn.Rune_Offsets.End = c.Index + c.Length
  444. cn.Rune_Offsets.Start = c.Index
  445. cn.Byte_Offsets.Start, cn.Byte_Offsets.End = bom[c.Index], bom[cn.Rune_Offsets.End]
  446. match.Groups[i].Captures = append(match.Groups[i].Captures, cn)
  447. }
  448. }
  449. ans = append(ans, match)
  450. m, _ = re.FindNextMatch(m)
  451. }
  452. return
  453. }
  454. func mark(r *regexp2.Regexp, post_processors []PostProcessorFunc, group_processors []GroupProcessorFunc, text string, opts *Options) (ans []Mark) {
  455. sanitize_pat := regexp.MustCompile("[\r\n\x00]")
  456. all_matches, _ := find_all_matches(r, text)
  457. for i, m := range all_matches {
  458. full_capture := m.Groups[0].LastCapture()
  459. match_start, match_end := full_capture.Byte_Offsets.Start, full_capture.Byte_Offsets.End
  460. for match_end > match_start+1 && text[match_end-1] == 0 {
  461. match_end--
  462. }
  463. full_match := text[match_start:match_end]
  464. if len([]rune(full_match)) < opts.MinimumMatchLength {
  465. continue
  466. }
  467. for _, f := range post_processors {
  468. match_start, match_end = f(text, match_start, match_end)
  469. if match_start < 0 {
  470. break
  471. }
  472. }
  473. if match_start < 0 {
  474. continue
  475. }
  476. full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
  477. gd := make(map[string]string, len(m.Groups))
  478. for idx, g := range m.Groups {
  479. if idx > 0 && g.IsNamed {
  480. c := g.LastCapture()
  481. if s, e := c.Byte_Offsets.Start, c.Byte_Offsets.End; s > -1 && e > -1 {
  482. s = max(s, match_start)
  483. e = min(e, match_end)
  484. gd[g.Name] = sanitize_pat.ReplaceAllLiteralString(text[s:e], "")
  485. }
  486. }
  487. }
  488. for _, f := range group_processors {
  489. f(gd)
  490. }
  491. gd2 := make(map[string]any, len(gd))
  492. for k, v := range gd {
  493. gd2[k] = v
  494. }
  495. if opts.Type == "regex" && len(m.Groups) > 1 && !m.HasNamedGroups() {
  496. cp := m.Groups[1].LastCapture()
  497. ms, me := cp.Byte_Offsets.Start, cp.Byte_Offsets.End
  498. match_start = max(match_start, ms)
  499. match_end = min(match_end, me)
  500. full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
  501. }
  502. if full_match != "" {
  503. ans = append(ans, Mark{
  504. Index: i, Start: match_start, End: match_end, Text: full_match, Groupdict: gd2,
  505. })
  506. }
  507. }
  508. return
  509. }
  510. type ErrNoMatches struct{ Type, Pattern string }
  511. func is_word_char(ch rune, current_chars []rune) bool {
  512. return unicode.IsLetter(ch) || unicode.IsNumber(ch) || (unicode.IsMark(ch) && len(current_chars) > 0 && unicode.IsLetter(current_chars[len(current_chars)-1]))
  513. }
  514. func mark_words(text string, opts *Options) (ans []Mark) {
  515. left := text
  516. var current_run struct {
  517. chars []rune
  518. start, size int
  519. }
  520. chars := opts.WordCharacters
  521. if chars == "" {
  522. chars = RelevantKittyOpts().Select_by_word_characters
  523. }
  524. allowed_chars := make(map[rune]bool, len(chars))
  525. for _, ch := range chars {
  526. allowed_chars[ch] = true
  527. }
  528. pos := 0
  529. post_processors := []PostProcessorFunc{PostProcessorMap()["brackets"], PostProcessorMap()["quotes"]}
  530. commit_run := func() {
  531. if len(current_run.chars) >= opts.MinimumMatchLength {
  532. match_start, match_end := current_run.start, current_run.start+current_run.size
  533. for _, f := range post_processors {
  534. match_start, match_end = f(text, match_start, match_end)
  535. if match_start < 0 {
  536. break
  537. }
  538. }
  539. if match_start > -1 && match_end > match_start {
  540. full_match := text[match_start:match_end]
  541. if len([]rune(full_match)) >= opts.MinimumMatchLength {
  542. ans = append(ans, Mark{
  543. Index: len(ans), Start: match_start, End: match_end, Text: full_match,
  544. })
  545. }
  546. }
  547. }
  548. current_run.chars = nil
  549. current_run.start = 0
  550. current_run.size = 0
  551. }
  552. for {
  553. ch, size := utf8.DecodeRuneInString(left)
  554. if ch == utf8.RuneError {
  555. break
  556. }
  557. if allowed_chars[ch] || is_word_char(ch, current_run.chars) {
  558. if len(current_run.chars) == 0 {
  559. current_run.start = pos
  560. }
  561. current_run.chars = append(current_run.chars, ch)
  562. current_run.size += size
  563. } else {
  564. commit_run()
  565. }
  566. left = left[size:]
  567. pos += size
  568. }
  569. commit_run()
  570. return
  571. }
  572. func adjust_python_offsets(text string, marks []Mark) error {
  573. // python returns rune based offsets (unicode chars not utf-8 bytes)
  574. adjust := utils.RuneOffsetsToByteOffsets(text)
  575. for i := range marks {
  576. mark := &marks[i]
  577. if mark.End < mark.Start {
  578. return fmt.Errorf("The end of a mark must not be before its start")
  579. }
  580. s, e := adjust(mark.Start), adjust(mark.End)
  581. if s < 0 || e < 0 {
  582. return fmt.Errorf("Overlapping marks are not supported")
  583. }
  584. mark.Start, mark.End = s, e
  585. }
  586. return nil
  587. }
  588. func (self *ErrNoMatches) Error() string {
  589. none_of := "matches"
  590. switch self.Type {
  591. case "urls":
  592. none_of = "URLs"
  593. case "hyperlinks":
  594. none_of = "hyperlinks"
  595. }
  596. if self.Pattern != "" {
  597. return fmt.Sprintf("No %s found with pattern: %s", none_of, self.Pattern)
  598. }
  599. return fmt.Sprintf("No %s found", none_of)
  600. }
  601. func find_marks(text string, opts *Options, cli_args ...string) (sanitized_text string, ans []Mark, index_map map[int]*Mark, err error) {
  602. sanitized_text, hyperlinks := process_escape_codes(text)
  603. used_pattern := ""
  604. run_basic_matching := func() error {
  605. pattern, post_processors, group_processors, err := functions_for(opts)
  606. if err != nil {
  607. return err
  608. }
  609. r, err := regexp2.Compile(pattern, regexp2.RE2)
  610. if err != nil {
  611. return fmt.Errorf("Failed to compile the regex pattern: %#v with error: %w", pattern, err)
  612. }
  613. ans = mark(r, post_processors, group_processors, sanitized_text, opts)
  614. used_pattern = pattern
  615. return nil
  616. }
  617. if opts.CustomizeProcessing != "" {
  618. cmd := exec.Command(utils.KittyExe(), append([]string{"+runpy", "from kittens.hints.main import custom_marking; custom_marking()"}, cli_args...)...)
  619. cmd.Stdin = strings.NewReader(sanitized_text)
  620. stdout, stderr := bytes.Buffer{}, bytes.Buffer{}
  621. cmd.Stdout, cmd.Stderr = &stdout, &stderr
  622. err = cmd.Run()
  623. if err != nil {
  624. var e *exec.ExitError
  625. if errors.As(err, &e) && e.ExitCode() == 2 {
  626. err = run_basic_matching()
  627. if err != nil {
  628. return
  629. }
  630. goto process_answer
  631. } else {
  632. return "", nil, nil, fmt.Errorf("Failed to run custom processor %#v with error: %w\n%s", opts.CustomizeProcessing, err, stderr.String())
  633. }
  634. }
  635. ans = make([]Mark, 0, 32)
  636. err = json.Unmarshal(stdout.Bytes(), &ans)
  637. if err != nil {
  638. return "", nil, nil, fmt.Errorf("Failed to load output from custom processor %#v with error: %w", opts.CustomizeProcessing, err)
  639. }
  640. err = adjust_python_offsets(sanitized_text, ans)
  641. if err != nil {
  642. return "", nil, nil, fmt.Errorf("Custom processor %#v produced invalid mark output with error: %w", opts.CustomizeProcessing, err)
  643. }
  644. } else if opts.Type == "hyperlink" {
  645. ans = hyperlinks
  646. } else if opts.Type == "word" {
  647. ans = mark_words(sanitized_text, opts)
  648. } else {
  649. err = run_basic_matching()
  650. if err != nil {
  651. return
  652. }
  653. }
  654. process_answer:
  655. if len(ans) == 0 {
  656. return "", nil, nil, &ErrNoMatches{Type: opts.Type, Pattern: used_pattern}
  657. }
  658. largest_index := ans[len(ans)-1].Index
  659. offset := max(0, opts.HintsOffset)
  660. index_map = make(map[int]*Mark, len(ans))
  661. for i := range ans {
  662. m := &ans[i]
  663. if opts.Ascending {
  664. m.Index += offset
  665. } else {
  666. m.Index = largest_index - m.Index + offset
  667. }
  668. index_map[m.Index] = m
  669. }
  670. return
  671. }