fuzzysearch.nim 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. # A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm
  2. # as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
  3. # Heavily modified to provide more subjectively useful results
  4. # for on the Nim manual.
  5. #
  6. import strutils
  7. import math
  8. import macros
  9. const
  10. MaxUnmatchedLeadingChar = 3
  11. ## Maximum number of times the penalty for unmatched leading chars is applied.
  12. HeadingScaleFactor = 0.5
  13. ## The score from before the colon Char is multiplied by this.
  14. ## This is to weight function signatures and descriptions over module titles.
  15. type
  16. ScoreCard = enum
  17. StartMatch = -100 ## Start matching.
  18. LeadingCharDiff = -3 ## An unmatched, leading character was found.
  19. CharDiff = -1 ## An unmatched character was found.
  20. CharMatch = 0 ## A matched character was found.
  21. ConsecutiveMatch = 5 ## A consecutive match was found.
  22. LeadingCharMatch = 10 ## The character matches the begining of the
  23. ## string or the first character of a word
  24. ## or camel case boundry.
  25. WordBoundryMatch = 20 ## The last ConsecutiveCharMatch that
  26. ## immediately precedes the end of the string,
  27. ## end of the pattern, or a LeadingCharMatch.
  28. proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] =
  29. var
  30. scoreState = StartMatch
  31. headerMatched = false
  32. unmatchedLeadingCharCount = 0
  33. consecutiveMatchCount = 0
  34. strIndex = 0
  35. patIndex = 0
  36. score = 0
  37. template transition(nextState) =
  38. scoreState = nextState
  39. score += ord(scoreState)
  40. while (strIndex < str.len) and (patIndex < pattern.len):
  41. var
  42. patternChar = pattern[patIndex].toLowerAscii
  43. strChar = str[strIndex].toLowerAscii
  44. # Ignore certain characters
  45. if patternChar in {'_', ' ', '.'}:
  46. patIndex += 1
  47. continue
  48. if strChar in {'_', ' ', '.'}:
  49. strIndex += 1
  50. continue
  51. # Since this algorithm will be used to search against Nim documentation,
  52. # the below logic prioritizes headers.
  53. if not headerMatched and strChar == ':':
  54. headerMatched = true
  55. scoreState = StartMatch
  56. score = toInt(floor(HeadingScaleFactor * toFloat(score)))
  57. patIndex = 0
  58. strIndex += 1
  59. continue
  60. if strChar == patternChar:
  61. case scoreState
  62. of StartMatch, WordBoundryMatch:
  63. scoreState = LeadingCharMatch
  64. of CharMatch:
  65. transition(ConsecutiveMatch)
  66. of LeadingCharMatch, ConsecutiveMatch:
  67. consecutiveMatchCount += 1
  68. scoreState = ConsecutiveMatch
  69. score += ord(ConsecutiveMatch) * consecutiveMatchCount
  70. if scoreState == LeadingCharMatch:
  71. score += ord(LeadingCharMatch)
  72. var onBoundary = (patIndex == high(pattern))
  73. if not onBoundary and strIndex < high(str):
  74. let
  75. nextPatternChar = toLowerAscii(pattern[patIndex + 1])
  76. nextStrChar = toLowerAscii(str[strIndex + 1])
  77. onBoundary = (
  78. nextStrChar notin {'a'..'z'} and
  79. nextStrChar != nextPatternChar
  80. )
  81. if onBoundary:
  82. transition(WordBoundryMatch)
  83. of CharDiff, LeadingCharDiff:
  84. var isLeadingChar = (
  85. str[strIndex - 1] notin Letters or
  86. str[strIndex - 1] in {'a'..'z'} and
  87. str[strIndex] in {'A'..'Z'}
  88. )
  89. if isLeadingChar:
  90. scoreState = LeadingCharMatch
  91. #a non alpha or a camel case transition counts as a leading char.
  92. # Transition the state, but don't give the bonus yet; wait until we verify a consecutive match.
  93. else:
  94. transition(CharMatch)
  95. patIndex += 1
  96. else:
  97. case scoreState
  98. of StartMatch:
  99. transition(LeadingCharDiff)
  100. of ConsecutiveMatch:
  101. transition(CharDiff)
  102. consecutiveMatchCount = 0
  103. of LeadingCharDiff:
  104. if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar:
  105. transition(LeadingCharDiff)
  106. unmatchedLeadingCharCount += 1
  107. else:
  108. transition(CharDiff)
  109. strIndex += 1
  110. result = (
  111. score: max(0, score),
  112. matched: (score > 0),
  113. )