123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234 |
- // License: GPLv3 Copyright: 2023, Kovid Goyal, <kovid at kovidgoyal.net>
- package subseq
- import (
- "fmt"
- "slices"
- "strings"
- "kitty/tools/utils"
- "kitty/tools/utils/images"
- )
- var _ = fmt.Print
- const (
- LEVEL1 = "/"
- LEVEL2 = "-_0123456789"
- LEVEL3 = "."
- )
- type resolved_options_type struct {
- level1, level2, level3 []rune
- }
- type Options struct {
- Level1, Level2, Level3 string
- NumberOfThreads int
- }
- type Match struct {
- Positions []int
- Score float64
- idx int
- Text string
- }
- func level_factor_for(current_lcase, last_lcase, current_cased, last_cased rune, opts *resolved_options_type) int {
- switch {
- case slices.Contains(opts.level1, last_lcase):
- return 90
- case slices.Contains(opts.level2, last_lcase):
- return 80
- case last_lcase == last_cased && current_lcase != current_cased: // camelCase
- return 80
- case slices.Contains(opts.level3, last_lcase):
- return 70
- default:
- return 0
- }
- }
- type workspace_type struct {
- positions [][]int // positions of each needle char in haystack
- level_factors []int
- address []int
- max_score_per_char float64
- }
- func (w *workspace_type) initialize(haystack_sz, needle_sz int) {
- if cap(w.positions) < needle_sz {
- w.positions = make([][]int, needle_sz)
- } else {
- w.positions = w.positions[:needle_sz]
- }
- if cap(w.level_factors) < haystack_sz {
- w.level_factors = make([]int, 2*haystack_sz)
- } else {
- w.level_factors = w.level_factors[:haystack_sz]
- }
- for i, s := range w.positions {
- if cap(s) < haystack_sz {
- w.positions[i] = make([]int, 0, 2*haystack_sz)
- } else {
- w.positions[i] = w.positions[i][:0]
- }
- }
- if cap(w.address) < needle_sz {
- w.address = make([]int, needle_sz)
- }
- w.address = utils.Memset(w.address)
- }
- func (w *workspace_type) position(x int) int { // the position of xth needle char in the haystack for the current address
- return w.positions[x][w.address[x]]
- }
- func (w *workspace_type) increment_address() bool {
- pos := len(w.positions) - 1 // the last needle char
- for {
- w.address[pos]++
- if w.address[pos] < len(w.positions[pos]) {
- return true
- }
- if pos == 0 {
- break
- }
- w.address[pos] = 0
- pos--
- }
- return false
- }
- func (w *workspace_type) address_is_monotonic() bool {
- // Check if the character positions pointed to by the current address are monotonic
- for i := 1; i < len(w.positions); i++ {
- if w.position(i) <= w.position(i-1) {
- return false
- }
- }
- return true
- }
- func (w *workspace_type) calc_score() (ans float64) {
- distance, pos := 0, 0
- for i := 0; i < len(w.positions); i++ {
- pos = w.position(i)
- if i == 0 {
- distance = pos + 1
- } else {
- distance = pos - w.position(i-1)
- if distance < 2 {
- ans += w.max_score_per_char // consecutive chars
- continue
- }
- }
- if w.level_factors[pos] > 0 {
- ans += (100.0 * w.max_score_per_char) / float64(w.level_factors[pos]) // at a special location
- } else {
- ans += (0.75 * w.max_score_per_char) / float64(distance)
- }
- }
- return
- }
- func has_atleast_one_match(w *workspace_type) (found bool) {
- p := -1
- for i := 0; i < len(w.positions); i++ {
- if len(w.positions[i]) == 0 { // all chars of needle not in haystack
- return false
- }
- found = false
- for _, pos := range w.positions[i] {
- if pos > p {
- p = pos
- found = true
- break
- }
- }
- if !found { // chars of needle not present in sequence in haystack
- return false
- }
- }
- return true
- }
- func score_item(item string, idx int, needle []rune, opts *resolved_options_type, w *workspace_type) *Match {
- ans := &Match{idx: idx, Text: item, Positions: make([]int, len(needle))}
- haystack := []rune(strings.ToLower(item))
- orig_haystack := []rune(item)
- w.initialize(len(orig_haystack), len(needle))
- for i := 0; i < len(haystack); i++ {
- level_factor_calculated := false
- for j := 0; j < len(needle); j++ {
- if needle[j] == haystack[i] {
- if !level_factor_calculated {
- level_factor_calculated = true
- if i > 0 {
- w.level_factors[i] = level_factor_for(haystack[i], haystack[i-1], orig_haystack[i], orig_haystack[i-1], opts)
- }
- }
- w.positions[j] = append(w.positions[j], i)
- }
- }
- }
- w.max_score_per_char = (1.0/float64(len(orig_haystack)) + 1.0/float64(len(needle))) / 2.0
- if !has_atleast_one_match(w) {
- return ans
- }
- var score float64
- for {
- if w.address_is_monotonic() {
- score = w.calc_score()
- if score > ans.Score {
- ans.Score = score
- for i := range ans.Positions {
- ans.Positions[i] = w.position(i)
- }
- }
- }
- if !w.increment_address() {
- break
- }
- }
- if ans.Score > 0 {
- adjust := utils.RuneOffsetsToByteOffsets(item)
- for i := range ans.Positions {
- ans.Positions[i] = adjust(ans.Positions[i])
- }
- }
- return ans
- }
- func ScoreItems(query string, items []string, opts Options) []*Match {
- ctx := images.Context{}
- ctx.SetNumberOfThreads(opts.NumberOfThreads)
- ans := make([]*Match, len(items))
- results := make(chan *Match, len(items))
- nr := []rune(strings.ToLower(query))
- if opts.Level1 == "" {
- opts.Level1 = LEVEL1
- }
- if opts.Level2 == "" {
- opts.Level2 = LEVEL2
- }
- if opts.Level3 == "" {
- opts.Level3 = LEVEL3
- }
- ropts := resolved_options_type{
- level1: []rune(opts.Level1), level2: []rune(opts.Level2), level3: []rune(opts.Level3),
- }
- ctx.Parallel(0, len(items), func(nums <-chan int) {
- w := workspace_type{}
- for i := range nums {
- results <- score_item(items[i], i, nr, &ropts, &w)
- }
- })
- close(results)
- for x := range results {
- ans[x.idx] = x
- }
- return ans
- }
|