marks.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. // License: GPLv3 Copyright: 2023, Kovid Goyal, <kovid at kovidgoyal.net>
  2. package hints
  3. import (
  4. "bytes"
  5. "encoding/json"
  6. "errors"
  7. "fmt"
  8. "os/exec"
  9. "path/filepath"
  10. "regexp"
  11. "slices"
  12. "strconv"
  13. "strings"
  14. "sync"
  15. "unicode"
  16. "unicode/utf8"
  17. "github.com/dlclark/regexp2"
  18. "github.com/seancfoley/ipaddress-go/ipaddr"
  19. "kitty"
  20. "kitty/tools/config"
  21. "kitty/tools/tty"
  22. "kitty/tools/utils"
  23. )
  24. var _ = fmt.Print
  25. const (
  26. DEFAULT_HINT_ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
  27. FILE_EXTENSION = `\.(?:[a-zA-Z0-9]{2,7}|[ahcmo])(?:\b|[^.])`
  28. )
  29. func path_regex() string {
  30. return fmt.Sprintf(`(?:\S*?/[\r\S]+)|(?:\S[\r\S]*%s)\b`, FILE_EXTENSION)
  31. }
  32. func default_linenum_regex() string {
  33. return fmt.Sprintf(`(?P<path>%s):(?P<line>\d+)`, path_regex())
  34. }
  35. type Mark struct {
  36. Index int `json:"index"`
  37. Start int `json:"start"`
  38. End int `json:"end"`
  39. Text string `json:"text"`
  40. Group_id string `json:"group_id"`
  41. Is_hyperlink bool `json:"is_hyperlink"`
  42. Groupdict map[string]any `json:"groupdict"`
  43. }
  44. func process_escape_codes(text string) (ans string, hyperlinks []Mark) {
  45. removed_size, idx := 0, 0
  46. active_hyperlink_url := ""
  47. active_hyperlink_id := ""
  48. active_hyperlink_start_offset := 0
  49. add_hyperlink := func(end int) {
  50. hyperlinks = append(hyperlinks, Mark{
  51. Index: idx, Start: active_hyperlink_start_offset, End: end, Text: active_hyperlink_url, Is_hyperlink: true, Group_id: active_hyperlink_id})
  52. active_hyperlink_url, active_hyperlink_id = "", ""
  53. active_hyperlink_start_offset = 0
  54. idx++
  55. }
  56. ans = utils.ReplaceAll(utils.MustCompile("\x1b(?:\\[[0-9;:]*?m|\\].*?\x1b\\\\)"), text, func(raw string, groupdict map[string]utils.SubMatch) string {
  57. if !strings.HasPrefix(raw, "\x1b]8") {
  58. removed_size += len(raw)
  59. return ""
  60. }
  61. start := groupdict[""].Start - removed_size
  62. removed_size += len(raw)
  63. if active_hyperlink_url != "" {
  64. add_hyperlink(start)
  65. }
  66. raw = raw[4 : len(raw)-2]
  67. if metadata, url, found := strings.Cut(raw, ";"); found && url != "" {
  68. active_hyperlink_url = url
  69. active_hyperlink_start_offset = start
  70. if metadata != "" {
  71. for _, entry := range strings.Split(metadata, ":") {
  72. if strings.HasPrefix(entry, "id=") && len(entry) > 3 {
  73. active_hyperlink_id = entry[3:]
  74. }
  75. }
  76. }
  77. }
  78. return ""
  79. })
  80. if active_hyperlink_url != "" {
  81. add_hyperlink(len(ans))
  82. }
  83. return
  84. }
  85. type PostProcessorFunc = func(string, int, int) (int, int)
  86. type GroupProcessorFunc = func(map[string]string)
  87. func is_punctuation(b string) bool {
  88. switch b {
  89. case ",", ".", "?", "!":
  90. return true
  91. }
  92. return false
  93. }
  94. func closing_bracket_for(ch string) string {
  95. switch ch {
  96. case "(":
  97. return ")"
  98. case "[":
  99. return "]"
  100. case "{":
  101. return "}"
  102. case "<":
  103. return ">"
  104. case "*":
  105. return "*"
  106. case `"`:
  107. return `"`
  108. case "'":
  109. return "'"
  110. case "“":
  111. return "”"
  112. case "‘":
  113. return "’"
  114. }
  115. return ""
  116. }
  117. func char_at(s string, i int) string {
  118. ans, _ := utf8.DecodeRuneInString(s[i:])
  119. if ans == utf8.RuneError {
  120. return ""
  121. }
  122. return string(ans)
  123. }
  124. func matching_remover(openers ...string) PostProcessorFunc {
  125. return func(text string, s, e int) (int, int) {
  126. if s < e && e <= len(text) {
  127. before := char_at(text, s)
  128. if slices.Index(openers, before) > -1 {
  129. q := closing_bracket_for(before)
  130. if e > 0 && char_at(text, e-1) == q {
  131. s++
  132. e--
  133. } else if char_at(text, e) == q {
  134. s++
  135. }
  136. }
  137. }
  138. return s, e
  139. }
  140. }
  141. func linenum_group_processor(gd map[string]string) {
  142. pat := utils.MustCompile(`:\d+$`)
  143. gd[`path`] = pat.ReplaceAllStringFunc(gd["path"], func(m string) string {
  144. gd["line"] = m[1:]
  145. return ``
  146. })
  147. gd[`path`] = utils.Expanduser(gd[`path`])
  148. }
  149. var PostProcessorMap = sync.OnceValue(func() map[string]PostProcessorFunc {
  150. return map[string]PostProcessorFunc{
  151. "url": func(text string, s, e int) (int, int) {
  152. if s > 4 && text[s-5:s] == "link:" { // asciidoc URLs
  153. url := text[s:e]
  154. idx := strings.LastIndex(url, "[")
  155. if idx > -1 {
  156. e -= len(url) - idx
  157. }
  158. }
  159. for e > 1 && is_punctuation(char_at(text, e)) { // remove trailing punctuation
  160. e--
  161. }
  162. // truncate url at closing bracket/quote
  163. if s > 0 && e <= len(text) && closing_bracket_for(char_at(text, s-1)) != "" {
  164. q := closing_bracket_for(char_at(text, s-1))
  165. idx := strings.Index(text[s:], q)
  166. if idx > 0 {
  167. e = s + idx
  168. }
  169. }
  170. // reStructuredText URLs
  171. if e > 3 && text[e-2:e] == "`_" {
  172. e -= 2
  173. }
  174. return s, e
  175. },
  176. "brackets": matching_remover("(", "{", "[", "<"),
  177. "quotes": matching_remover("'", `"`, "“", "‘"),
  178. "ip": func(text string, s, e int) (int, int) {
  179. addr := ipaddr.NewHostName(text[s:e])
  180. if !addr.IsAddress() {
  181. return -1, -1
  182. }
  183. return s, e
  184. },
  185. }
  186. })
  187. type KittyOpts struct {
  188. Url_prefixes *utils.Set[string]
  189. Url_excluded_characters string
  190. Select_by_word_characters string
  191. }
  192. func read_relevant_kitty_opts(path string) KittyOpts {
  193. ans := KittyOpts{
  194. Select_by_word_characters: kitty.KittyConfigDefaults.Select_by_word_characters,
  195. Url_excluded_characters: kitty.KittyConfigDefaults.Url_excluded_characters}
  196. handle_line := func(key, val string) error {
  197. switch key {
  198. case "url_prefixes":
  199. ans.Url_prefixes = utils.NewSetWithItems(strings.Split(val, " ")...)
  200. case "select_by_word_characters":
  201. ans.Select_by_word_characters = strings.TrimSpace(val)
  202. case "url_excluded_characters":
  203. if s, err := config.StringLiteral(val); err == nil {
  204. ans.Url_excluded_characters = s
  205. }
  206. }
  207. return nil
  208. }
  209. cp := config.ConfigParser{LineHandler: handle_line}
  210. _ = cp.ParseFiles(path) // ignore errors and use defaults
  211. if ans.Url_prefixes == nil {
  212. ans.Url_prefixes = utils.NewSetWithItems(kitty.KittyConfigDefaults.Url_prefixes...)
  213. }
  214. return ans
  215. }
  216. var RelevantKittyOpts = sync.OnceValue(func() KittyOpts {
  217. return read_relevant_kitty_opts(filepath.Join(utils.ConfigDir(), "kitty.conf"))
  218. })
  219. var debugprintln = tty.DebugPrintln
  220. var _ = debugprintln
  221. func url_excluded_characters_as_ranges_for_regex(extra_excluded string) string {
  222. // See https://url.spec.whatwg.org/#url-code-points
  223. ans := strings.Builder{}
  224. ans.Grow(4096)
  225. type cr struct{ start, end rune }
  226. ranges := []cr{}
  227. r := func(start rune, end ...rune) {
  228. if len(end) == 0 {
  229. ranges = append(ranges, cr{start, start})
  230. } else {
  231. ranges = append(ranges, cr{start, end[0]})
  232. }
  233. }
  234. if !strings.Contains(extra_excluded, "\n") {
  235. r('\n')
  236. }
  237. if !strings.Contains(extra_excluded, "\r") {
  238. r('\r')
  239. }
  240. r('!')
  241. r('$')
  242. r('&')
  243. r('#')
  244. r('\'')
  245. r('/')
  246. r(':')
  247. r(';')
  248. r('@')
  249. r('_')
  250. r('~')
  251. r('(')
  252. r(')')
  253. r('*')
  254. r('+')
  255. r(',')
  256. r('-')
  257. r('.')
  258. r('=')
  259. r('?')
  260. r('%')
  261. r('a', 'z')
  262. r('A', 'Z')
  263. r('0', '9')
  264. slices.SortFunc(ranges, func(a, b cr) int { return int(a.start - b.start) })
  265. var prev rune = -1
  266. for _, cr := range ranges {
  267. if cr.start-1 > prev+1 {
  268. ans.WriteString(regexp.QuoteMeta(string(prev + 1)))
  269. ans.WriteRune('-')
  270. ans.WriteString(regexp.QuoteMeta(string(cr.start - 1)))
  271. }
  272. prev = cr.end
  273. }
  274. ans.WriteString(regexp.QuoteMeta(string(ranges[len(ranges)-1].end + 1)))
  275. ans.WriteRune('-')
  276. ans.WriteRune(0x9f)
  277. ans.WriteString(`\x{d800}-\x{dfff}`)
  278. ans.WriteString(`\x{fdd0}-\x{fdef}`)
  279. w := func(x rune) { ans.WriteRune(x) }
  280. w(0xFFFE)
  281. w(0xFFFF)
  282. w(0x1FFFE)
  283. w(0x1FFFF)
  284. w(0x2FFFE)
  285. w(0x2FFFF)
  286. w(0x3FFFE)
  287. w(0x3FFFF)
  288. w(0x4FFFE)
  289. w(0x4FFFF)
  290. w(0x5FFFE)
  291. w(0x5FFFF)
  292. w(0x6FFFE)
  293. w(0x6FFFF)
  294. w(0x7FFFE)
  295. w(0x7FFFF)
  296. w(0x8FFFE)
  297. w(0x8FFFF)
  298. w(0x9FFFE)
  299. w(0x9FFFF)
  300. w(0xAFFFE)
  301. w(0xAFFFF)
  302. w(0xBFFFE)
  303. w(0xBFFFF)
  304. w(0xCFFFE)
  305. w(0xCFFFF)
  306. w(0xDFFFE)
  307. w(0xDFFFF)
  308. w(0xEFFFE)
  309. w(0xEFFFF)
  310. w(0xFFFFE)
  311. w(0xFFFFF)
  312. if strings.Contains(extra_excluded, "-") {
  313. extra_excluded = strings.ReplaceAll(extra_excluded, "-", "")
  314. extra_excluded = regexp.QuoteMeta(extra_excluded) + "-"
  315. } else {
  316. extra_excluded = regexp.QuoteMeta(extra_excluded)
  317. }
  318. ans.WriteString(extra_excluded)
  319. return ans.String()
  320. }
  321. func functions_for(opts *Options) (pattern string, post_processors []PostProcessorFunc, group_processors []GroupProcessorFunc, err error) {
  322. switch opts.Type {
  323. case "url":
  324. var url_prefixes *utils.Set[string]
  325. if opts.UrlPrefixes == "default" {
  326. url_prefixes = RelevantKittyOpts().Url_prefixes
  327. } else {
  328. url_prefixes = utils.NewSetWithItems(strings.Split(opts.UrlPrefixes, ",")...)
  329. }
  330. url_excluded_characters := RelevantKittyOpts().Url_excluded_characters
  331. if opts.UrlExcludedCharacters != "default" {
  332. if url_excluded_characters, err = config.StringLiteral(opts.UrlExcludedCharacters); err != nil {
  333. err = fmt.Errorf("Failed to parse --url-excluded-characters value: %#v with error: %w", opts.UrlExcludedCharacters, err)
  334. return
  335. }
  336. }
  337. pattern = fmt.Sprintf(`(?:%s)://[^%s]{3,}`, strings.Join(url_prefixes.AsSlice(), "|"), url_excluded_characters_as_ranges_for_regex(url_excluded_characters))
  338. post_processors = append(post_processors, PostProcessorMap()["url"])
  339. case "path":
  340. pattern = path_regex()
  341. post_processors = append(post_processors, PostProcessorMap()["brackets"], PostProcessorMap()["quotes"])
  342. case "line":
  343. pattern = "(?m)^\\s*(.+)[\\s\x00]*$"
  344. case "hash":
  345. pattern = "[0-9a-f][0-9a-f\r]{6,127}"
  346. case "ip":
  347. pattern = (
  348. // IPv4 with no validation
  349. `((?:\d{1,3}\.){3}\d{1,3}` + "|" +
  350. // IPv6 with no validation
  351. `(?:[a-fA-F0-9]{0,4}:){2,7}[a-fA-F0-9]{1,4})`)
  352. post_processors = append(post_processors, PostProcessorMap()["ip"])
  353. default:
  354. pattern = opts.Regex
  355. if opts.Type == "linenum" {
  356. if pattern == kitty.HintsDefaultRegex {
  357. pattern = default_linenum_regex()
  358. }
  359. post_processors = append(post_processors, PostProcessorMap()["brackets"], PostProcessorMap()["quotes"])
  360. group_processors = append(group_processors, linenum_group_processor)
  361. }
  362. }
  363. return
  364. }
  365. type Capture struct {
  366. Text string
  367. Text_as_runes []rune
  368. Byte_Offsets struct {
  369. Start, End int
  370. }
  371. Rune_Offsets struct {
  372. Start, End int
  373. }
  374. }
  375. func (self Capture) String() string {
  376. return fmt.Sprintf("Capture(start=%d, end=%d, %#v)", self.Byte_Offsets.Start, self.Byte_Offsets.End, self.Text)
  377. }
  378. type Group struct {
  379. Name string
  380. IsNamed bool
  381. Captures []Capture
  382. }
  383. func (self Group) LastCapture() Capture {
  384. if len(self.Captures) == 0 {
  385. return Capture{}
  386. }
  387. return self.Captures[len(self.Captures)-1]
  388. }
  389. func (self Group) String() string {
  390. return fmt.Sprintf("Group(name=%#v, captures=%v)", self.Name, self.Captures)
  391. }
  392. type Match struct {
  393. Groups []Group
  394. }
  395. func (self Match) HasNamedGroups() bool {
  396. for _, g := range self.Groups {
  397. if g.IsNamed {
  398. return true
  399. }
  400. }
  401. return false
  402. }
  403. func find_all_matches(re *regexp2.Regexp, text string) (ans []Match, err error) {
  404. m, err := re.FindStringMatch(text)
  405. if err != nil {
  406. return
  407. }
  408. rune_to_bytes := utils.RuneOffsetsToByteOffsets(text)
  409. get_byte_offset_map := func(groups []regexp2.Group) (ans map[int]int, err error) {
  410. ans = make(map[int]int, len(groups)*2)
  411. rune_offsets := make([]int, 0, len(groups)*2)
  412. for _, g := range groups {
  413. for _, c := range g.Captures {
  414. if _, found := ans[c.Index]; !found {
  415. rune_offsets = append(rune_offsets, c.Index)
  416. ans[c.Index] = -1
  417. }
  418. end := c.Index + c.Length
  419. if _, found := ans[end]; !found {
  420. rune_offsets = append(rune_offsets, end)
  421. ans[end] = -1
  422. }
  423. }
  424. }
  425. slices.Sort(rune_offsets)
  426. for _, pos := range rune_offsets {
  427. if ans[pos] = rune_to_bytes(pos); ans[pos] < 0 {
  428. return nil, fmt.Errorf("Matches are not monotonic cannot map rune offsets to byte offsets")
  429. }
  430. }
  431. return
  432. }
  433. for m != nil {
  434. groups := m.Groups()
  435. bom, err := get_byte_offset_map(groups)
  436. if err != nil {
  437. return nil, err
  438. }
  439. match := Match{Groups: make([]Group, len(groups))}
  440. for i, g := range m.Groups() {
  441. match.Groups[i].Name = g.Name
  442. match.Groups[i].IsNamed = g.Name != "" && g.Name != strconv.Itoa(i)
  443. for _, c := range g.Captures {
  444. cn := Capture{Text: c.String(), Text_as_runes: c.Runes()}
  445. cn.Rune_Offsets.End = c.Index + c.Length
  446. cn.Rune_Offsets.Start = c.Index
  447. cn.Byte_Offsets.Start, cn.Byte_Offsets.End = bom[c.Index], bom[cn.Rune_Offsets.End]
  448. match.Groups[i].Captures = append(match.Groups[i].Captures, cn)
  449. }
  450. }
  451. ans = append(ans, match)
  452. m, _ = re.FindNextMatch(m)
  453. }
  454. return
  455. }
  456. func mark(r *regexp2.Regexp, post_processors []PostProcessorFunc, group_processors []GroupProcessorFunc, text string, opts *Options) (ans []Mark) {
  457. sanitize_pat := regexp.MustCompile("[\r\n\x00]")
  458. all_matches, _ := find_all_matches(r, text)
  459. for i, m := range all_matches {
  460. full_capture := m.Groups[0].LastCapture()
  461. match_start, match_end := full_capture.Byte_Offsets.Start, full_capture.Byte_Offsets.End
  462. for match_end > match_start+1 && text[match_end-1] == 0 {
  463. match_end--
  464. }
  465. full_match := text[match_start:match_end]
  466. if len([]rune(full_match)) < opts.MinimumMatchLength {
  467. continue
  468. }
  469. for _, f := range post_processors {
  470. match_start, match_end = f(text, match_start, match_end)
  471. if match_start < 0 {
  472. break
  473. }
  474. }
  475. if match_start < 0 {
  476. continue
  477. }
  478. full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
  479. gd := make(map[string]string, len(m.Groups))
  480. for idx, g := range m.Groups {
  481. if idx > 0 && g.IsNamed {
  482. c := g.LastCapture()
  483. if s, e := c.Byte_Offsets.Start, c.Byte_Offsets.End; s > -1 && e > -1 {
  484. s = max(s, match_start)
  485. e = min(e, match_end)
  486. gd[g.Name] = sanitize_pat.ReplaceAllLiteralString(text[s:e], "")
  487. }
  488. }
  489. }
  490. for _, f := range group_processors {
  491. f(gd)
  492. }
  493. gd2 := make(map[string]any, len(gd))
  494. for k, v := range gd {
  495. gd2[k] = v
  496. }
  497. if opts.Type == "regex" && len(m.Groups) > 1 && !m.HasNamedGroups() {
  498. cp := m.Groups[1].LastCapture()
  499. ms, me := cp.Byte_Offsets.Start, cp.Byte_Offsets.End
  500. match_start = max(match_start, ms)
  501. match_end = min(match_end, me)
  502. full_match = sanitize_pat.ReplaceAllLiteralString(text[match_start:match_end], "")
  503. }
  504. if full_match != "" {
  505. ans = append(ans, Mark{
  506. Index: i, Start: match_start, End: match_end, Text: full_match, Groupdict: gd2,
  507. })
  508. }
  509. }
  510. return
  511. }
  512. type ErrNoMatches struct{ Type, Pattern string }
  513. func is_word_char(ch rune, current_chars []rune) bool {
  514. return unicode.IsLetter(ch) || unicode.IsNumber(ch) || (unicode.IsMark(ch) && len(current_chars) > 0 && unicode.IsLetter(current_chars[len(current_chars)-1]))
  515. }
  516. func mark_words(text string, opts *Options) (ans []Mark) {
  517. left := text
  518. var current_run struct {
  519. chars []rune
  520. start, size int
  521. }
  522. chars := opts.WordCharacters
  523. if chars == "" {
  524. chars = RelevantKittyOpts().Select_by_word_characters
  525. }
  526. allowed_chars := make(map[rune]bool, len(chars))
  527. for _, ch := range chars {
  528. allowed_chars[ch] = true
  529. }
  530. pos := 0
  531. post_processors := []PostProcessorFunc{PostProcessorMap()["brackets"], PostProcessorMap()["quotes"]}
  532. commit_run := func() {
  533. if len(current_run.chars) >= opts.MinimumMatchLength {
  534. match_start, match_end := current_run.start, current_run.start+current_run.size
  535. for _, f := range post_processors {
  536. match_start, match_end = f(text, match_start, match_end)
  537. if match_start < 0 {
  538. break
  539. }
  540. }
  541. if match_start > -1 && match_end > match_start {
  542. full_match := text[match_start:match_end]
  543. if len([]rune(full_match)) >= opts.MinimumMatchLength {
  544. ans = append(ans, Mark{
  545. Index: len(ans), Start: match_start, End: match_end, Text: full_match,
  546. })
  547. }
  548. }
  549. }
  550. current_run.chars = nil
  551. current_run.start = 0
  552. current_run.size = 0
  553. }
  554. for {
  555. ch, size := utf8.DecodeRuneInString(left)
  556. if ch == utf8.RuneError {
  557. break
  558. }
  559. if allowed_chars[ch] || is_word_char(ch, current_run.chars) {
  560. if len(current_run.chars) == 0 {
  561. current_run.start = pos
  562. }
  563. current_run.chars = append(current_run.chars, ch)
  564. current_run.size += size
  565. } else {
  566. commit_run()
  567. }
  568. left = left[size:]
  569. pos += size
  570. }
  571. commit_run()
  572. return
  573. }
  574. func adjust_python_offsets(text string, marks []Mark) error {
  575. // python returns rune based offsets (unicode chars not utf-8 bytes)
  576. adjust := utils.RuneOffsetsToByteOffsets(text)
  577. for i := range marks {
  578. mark := &marks[i]
  579. if mark.End < mark.Start {
  580. return fmt.Errorf("The end of a mark must not be before its start")
  581. }
  582. s, e := adjust(mark.Start), adjust(mark.End)
  583. if s < 0 || e < 0 {
  584. return fmt.Errorf("Overlapping marks are not supported")
  585. }
  586. mark.Start, mark.End = s, e
  587. }
  588. return nil
  589. }
  590. func (self *ErrNoMatches) Error() string {
  591. none_of := "matches"
  592. switch self.Type {
  593. case "urls":
  594. none_of = "URLs"
  595. case "hyperlinks":
  596. none_of = "hyperlinks"
  597. }
  598. if self.Pattern != "" {
  599. return fmt.Sprintf("No %s found with pattern: %s", none_of, self.Pattern)
  600. }
  601. return fmt.Sprintf("No %s found", none_of)
  602. }
  603. func find_marks(text string, opts *Options, cli_args ...string) (sanitized_text string, ans []Mark, index_map map[int]*Mark, err error) {
  604. sanitized_text, hyperlinks := process_escape_codes(text)
  605. used_pattern := ""
  606. run_basic_matching := func() error {
  607. pattern, post_processors, group_processors, err := functions_for(opts)
  608. if err != nil {
  609. return err
  610. }
  611. r, err := regexp2.Compile(pattern, regexp2.RE2)
  612. if err != nil {
  613. return fmt.Errorf("Failed to compile the regex pattern: %#v with error: %w", pattern, err)
  614. }
  615. ans = mark(r, post_processors, group_processors, sanitized_text, opts)
  616. used_pattern = pattern
  617. return nil
  618. }
  619. if opts.CustomizeProcessing != "" {
  620. cmd := exec.Command(utils.KittyExe(), append([]string{"+runpy", "from kittens.hints.main import custom_marking; custom_marking()"}, cli_args...)...)
  621. cmd.Stdin = strings.NewReader(sanitized_text)
  622. stdout, stderr := bytes.Buffer{}, bytes.Buffer{}
  623. cmd.Stdout, cmd.Stderr = &stdout, &stderr
  624. err = cmd.Run()
  625. if err != nil {
  626. var e *exec.ExitError
  627. if errors.As(err, &e) && e.ExitCode() == 2 {
  628. err = run_basic_matching()
  629. if err != nil {
  630. return
  631. }
  632. goto process_answer
  633. } else {
  634. return "", nil, nil, fmt.Errorf("Failed to run custom processor %#v with error: %w\n%s", opts.CustomizeProcessing, err, stderr.String())
  635. }
  636. }
  637. ans = make([]Mark, 0, 32)
  638. err = json.Unmarshal(stdout.Bytes(), &ans)
  639. if err != nil {
  640. return "", nil, nil, fmt.Errorf("Failed to load output from custom processor %#v with error: %w", opts.CustomizeProcessing, err)
  641. }
  642. err = adjust_python_offsets(sanitized_text, ans)
  643. if err != nil {
  644. return "", nil, nil, fmt.Errorf("Custom processor %#v produced invalid mark output with error: %w", opts.CustomizeProcessing, err)
  645. }
  646. } else if opts.Type == "hyperlink" {
  647. ans = hyperlinks
  648. } else if opts.Type == "word" {
  649. ans = mark_words(sanitized_text, opts)
  650. } else {
  651. err = run_basic_matching()
  652. if err != nil {
  653. return
  654. }
  655. }
  656. process_answer:
  657. if len(ans) == 0 {
  658. return "", nil, nil, &ErrNoMatches{Type: opts.Type, Pattern: used_pattern}
  659. }
  660. largest_index := ans[len(ans)-1].Index
  661. offset := max(0, opts.HintsOffset)
  662. index_map = make(map[int]*Mark, len(ans))
  663. for i := range ans {
  664. m := &ans[i]
  665. if opts.Ascending {
  666. m.Index += offset
  667. } else {
  668. m.Index = largest_index - m.Index + offset
  669. }
  670. index_map[m.Index] = m
  671. }
  672. return
  673. }