stringcases.nim 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2023 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## included from ast2ir.nim
  10. #[
  11. case s
  12. of "abc", "abbd":
  13. echo 1
  14. of "hah":
  15. echo 2
  16. of "gah":
  17. echo 3
  18. # we produce code like this:
  19. if s[0] <= 'a':
  20. if s == "abc: goto L1
  21. elif s == "abbd": goto L1
  22. else:
  23. if s[2] <= 'h':
  24. if s == "hah": goto L2
  25. elif s == "gah": goto L3
  26. goto afterCase
  27. L1:
  28. echo 1
  29. goto afterCase
  30. L2:
  31. echo 2
  32. goto afterCase
  33. L3:
  34. echo 3
  35. goto afterCase
  36. afterCase: ...
  37. ]#
  38. # We split the set of strings into 2 sets of roughly the same size.
  39. # The condition used for splitting is a (position, char) tuple.
  40. # Every string of length > position for which s[position] <= char is in one
  41. # set else it is in the other set.
  42. from std/sequtils import addUnique
  43. type
  44. Key = (LitId, LabelId)
  45. proc splitValue(strings: BiTable[string]; a: openArray[Key]; position: int): (char, float) =
  46. var cand: seq[char] = @[]
  47. for t in items a:
  48. let s = strings[t[0]]
  49. if s.len > position: cand.addUnique s[position]
  50. result = ('\0', -1.0)
  51. for disc in items cand:
  52. var hits = 0
  53. for t in items a:
  54. let s = strings[t[0]]
  55. if s.len > position and s[position] <= disc:
  56. inc hits
  57. # the split is the better, the more `hits` is close to `a.len / 2`:
  58. let grade = 100000.0 - abs(hits.float - a.len.float / 2.0)
  59. if grade > result[1]:
  60. result = (disc, grade)
  61. proc tryAllPositions(strings: BiTable[string]; a: openArray[Key]): (char, int) =
  62. var m = 0
  63. for t in items a:
  64. m = max(m, strings[t[0]].len)
  65. result = ('\0', -1)
  66. var best = -1.0
  67. for i in 0 ..< m:
  68. let current = splitValue(strings, a, i)
  69. if current[1] > best:
  70. best = current[1]
  71. result = (current[0], i)
  72. type
  73. SearchKind = enum
  74. LinearSearch, SplitSearch
  75. SearchResult* = object
  76. case kind: SearchKind
  77. of LinearSearch:
  78. a: seq[Key]
  79. of SplitSearch:
  80. span: int
  81. best: (char, int)
  82. proc emitLinearSearch(strings: BiTable[string]; a: openArray[Key]; dest: var seq[SearchResult]) =
  83. var d = SearchResult(kind: LinearSearch, a: @[])
  84. for x in a: d.a.add x
  85. dest.add d
  86. proc split(strings: BiTable[string]; a: openArray[Key]; dest: var seq[SearchResult]) =
  87. if a.len <= 4:
  88. emitLinearSearch strings, a, dest
  89. else:
  90. let best = tryAllPositions(strings, a)
  91. var groupA: seq[Key] = @[]
  92. var groupB: seq[Key] = @[]
  93. for t in items a:
  94. let s = strings[t[0]]
  95. if s.len > best[1] and s[best[1]] <= best[0]:
  96. groupA.add t
  97. else:
  98. groupB.add t
  99. if groupA.len == 0 or groupB.len == 0:
  100. emitLinearSearch strings, a, dest
  101. else:
  102. let toPatch = dest.len
  103. dest.add SearchResult(kind: SplitSearch, span: 1, best: best)
  104. split strings, groupA, dest
  105. split strings, groupB, dest
  106. let dist = dest.len - toPatch
  107. assert dist > 0
  108. dest[toPatch].span = dist
  109. proc toProblemDescription(c: var ProcCon; n: PNode): (seq[Key], LabelId) =
  110. result = (@[], newLabels(c.labelGen, n.len))
  111. assert n.kind == nkCaseStmt
  112. for i in 1..<n.len:
  113. let it = n[i]
  114. let thisBranch = LabelId(result[1].int + i - 1)
  115. if it.kind == nkOfBranch:
  116. for j in 0..<it.len-1:
  117. assert it[j].kind in {nkStrLit..nkTripleStrLit}
  118. result[0].add (c.lit.strings.getOrIncl(it[j].strVal), thisBranch)
  119. proc decodeSolution(c: var ProcCon; dest: var Tree; s: seq[SearchResult]; i: int;
  120. selector: Value; info: PackedLineInfo) =
  121. case s[i].kind
  122. of SplitSearch:
  123. let thenA = i+1
  124. let elseA = thenA + (if s[thenA].kind == LinearSearch: 1 else: s[thenA].span)
  125. let best = s[i].best
  126. let tmp = getTemp(c, Bool8Id, info)
  127. buildTyped dest, info, Asgn, Bool8Id:
  128. dest.copyTree tmp
  129. buildTyped dest, info, Call, Bool8Id:
  130. c.addUseCodegenProc dest, "nimStrAtLe", info
  131. dest.copyTree selector
  132. dest.addIntVal c.lit.numbers, info, c.m.nativeIntId, best[1]
  133. dest.addIntVal c.lit.numbers, info, Char8Id, best[0].int
  134. template then() =
  135. c.decodeSolution dest, s, thenA, selector, info
  136. template otherwise() =
  137. c.decodeSolution dest, s, elseA, selector, info
  138. buildIfThenElse tmp, then, otherwise
  139. freeTemp c, tmp
  140. of LinearSearch:
  141. let tmp = getTemp(c, Bool8Id, info)
  142. for x in s[i].a:
  143. buildTyped dest, info, Asgn, Bool8Id:
  144. dest.copyTree tmp
  145. buildTyped dest, info, Call, Bool8Id:
  146. c.addUseCodegenProc dest, "eqStrings", info
  147. dest.copyTree selector
  148. dest.addStrLit info, x[0]
  149. buildIf tmp:
  150. c.code.gotoLabel info, Goto, x[1]
  151. freeTemp c, tmp
  152. proc genStringCase(c: var ProcCon; n: PNode; d: var Value) =
  153. let (problem, firstBranch) = toProblemDescription(c, n)
  154. var solution: seq[SearchResult] = @[]
  155. split c.lit.strings, problem, solution
  156. # XXX Todo move complex case selector into a temporary.
  157. let selector = c.genx(n[0])
  158. let info = toLineInfo(c, n.info)
  159. decodeSolution c, c.code, solution, 0, selector, info
  160. let lend = newLabel(c.labelGen)
  161. c.code.addLabel info, Goto, lend
  162. for i in 1..<n.len:
  163. let it = n[i]
  164. let thisBranch = LabelId(firstBranch.int + i - 1)
  165. c.code.addLabel info, Label, thisBranch
  166. if it.kind == nkOfBranch:
  167. gen(c, it.lastSon, d)
  168. c.code.addLabel info, Goto, lend
  169. else:
  170. gen(c, it.lastSon, d)
  171. c.code.addLabel info, Label, lend
  172. freeTemp c, selector