strtabs.nim 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The ``strtabs`` module implements an efficient hash table that is a mapping
  10. ## from strings to strings. Supports a case-sensitive, case-insensitive and
  11. ## style-insensitive mode. An efficient string substitution operator ``%``
  12. ## for the string table is also provided.
  13. import
  14. hashes, strutils
  15. when defined(js):
  16. {.pragma: rtlFunc.}
  17. else:
  18. {.pragma: rtlFunc, rtl.}
  19. import os
  20. include "system/inclrtl"
  21. type
  22. StringTableMode* = enum ## describes the tables operation mode
  23. modeCaseSensitive, ## the table is case sensitive
  24. modeCaseInsensitive, ## the table is case insensitive
  25. modeStyleInsensitive ## the table is style insensitive
  26. KeyValuePair = tuple[key, val: string, hasValue: bool]
  27. KeyValuePairSeq = seq[KeyValuePair]
  28. StringTableObj* = object of RootObj
  29. counter: int
  30. data: KeyValuePairSeq
  31. mode: StringTableMode
  32. StringTableRef* = ref StringTableObj ## use this type to declare string tables
  33. proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} =
  34. ## returns the number of keys in `t`.
  35. result = t.counter
  36. iterator pairs*(t: StringTableRef): tuple[key, value: string] =
  37. ## iterates over every (key, value) pair in the table `t`.
  38. for h in 0..high(t.data):
  39. if t.data[h].hasValue:
  40. yield (t.data[h].key, t.data[h].val)
  41. iterator keys*(t: StringTableRef): string =
  42. ## iterates over every key in the table `t`.
  43. for h in 0..high(t.data):
  44. if t.data[h].hasValue:
  45. yield t.data[h].key
  46. iterator values*(t: StringTableRef): string =
  47. ## iterates over every value in the table `t`.
  48. for h in 0..high(t.data):
  49. if t.data[h].hasValue:
  50. yield t.data[h].val
  51. type
  52. FormatFlag* = enum ## flags for the `%` operator
  53. useEnvironment, ## use environment variable if the ``$key``
  54. ## is not found in the table. Does nothing when using `js` target.
  55. useEmpty, ## use the empty string as a default, thus it
  56. ## won't throw an exception if ``$key`` is not
  57. ## in the table
  58. useKey ## do not replace ``$key`` if it is not found
  59. ## in the table (or in the environment)
  60. const
  61. growthFactor = 2
  62. startSize = 64
  63. proc myhash(t: StringTableRef, key: string): Hash =
  64. case t.mode
  65. of modeCaseSensitive: result = hashes.hash(key)
  66. of modeCaseInsensitive: result = hashes.hashIgnoreCase(key)
  67. of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key)
  68. proc myCmp(t: StringTableRef, a, b: string): bool =
  69. case t.mode
  70. of modeCaseSensitive: result = cmp(a, b) == 0
  71. of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0
  72. of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0
  73. proc mustRehash(length, counter: int): bool =
  74. assert(length > counter)
  75. result = (length * 2 < counter * 3) or (length - counter < 4)
  76. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  77. result = ((5 * h) + 1) and maxHash
  78. proc rawGet(t: StringTableRef, key: string): int =
  79. var h: Hash = myhash(t, key) and high(t.data) # start with real hash value
  80. while t.data[h].hasValue:
  81. if myCmp(t, t.data[h].key, key):
  82. return h
  83. h = nextTry(h, high(t.data))
  84. result = - 1
  85. template get(t: StringTableRef, key: string) =
  86. var index = rawGet(t, key)
  87. if index >= 0: result = t.data[index].val
  88. else:
  89. when compiles($key):
  90. raise newException(KeyError, "key not found: " & $key)
  91. else:
  92. raise newException(KeyError, "key not found")
  93. proc `[]`*(t: StringTableRef, key: string): var string {.
  94. rtlFunc, extern: "nstTake".} =
  95. ## retrieves the location at ``t[key]``. If `key` is not in `t`, the
  96. ## ``KeyError`` exception is raised. One can check with ``hasKey`` whether
  97. ## the key exists.
  98. get(t, key)
  99. proc getOrDefault*(t: StringTableRef; key: string, default: string = ""): string =
  100. var index = rawGet(t, key)
  101. if index >= 0: result = t.data[index].val
  102. else: result = default
  103. proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc, extern: "nst$1".} =
  104. ## returns true iff `key` is in the table `t`.
  105. result = rawGet(t, key) >= 0
  106. proc contains*(t: StringTableRef, key: string): bool =
  107. ## alias of `hasKey` for use with the `in` operator.
  108. return hasKey(t, key)
  109. proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) =
  110. var h: Hash = myhash(t, key) and high(data)
  111. while data[h].hasValue:
  112. h = nextTry(h, high(data))
  113. data[h].key = key
  114. data[h].val = val
  115. data[h].hasValue = true
  116. proc enlarge(t: StringTableRef) =
  117. var n: KeyValuePairSeq
  118. newSeq(n, len(t.data) * growthFactor)
  119. for i in countup(0, high(t.data)):
  120. if t.data[i].hasValue: rawInsert(t, n, t.data[i].key, t.data[i].val)
  121. swap(t.data, n)
  122. proc `[]=`*(t: StringTableRef, key, val: string) {.rtlFunc, extern: "nstPut".} =
  123. ## puts a (key, value)-pair into `t`.
  124. var index = rawGet(t, key)
  125. if index >= 0:
  126. t.data[index].val = val
  127. else:
  128. if mustRehash(len(t.data), t.counter): enlarge(t)
  129. rawInsert(t, t.data, key, val)
  130. inc(t.counter)
  131. proc raiseFormatException(s: string) =
  132. var e: ref ValueError
  133. new(e)
  134. e.msg = "format string: key not found: " & s
  135. raise e
  136. proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string =
  137. if hasKey(t, key): return t.getOrDefault(key)
  138. # hm difficult: assume safety in taint mode here. XXX This is dangerous!
  139. when defined(js):
  140. result = ""
  141. else:
  142. if useEnvironment in flags: result = os.getEnv(key).string
  143. else: result = ""
  144. if result.len == 0:
  145. if useKey in flags: result = '$' & key
  146. elif useEmpty notin flags: raiseFormatException(key)
  147. proc newStringTable*(mode: StringTableMode): StringTableRef {.
  148. rtlFunc, extern: "nst$1".} =
  149. ## creates a new string table that is empty.
  150. new(result)
  151. result.mode = mode
  152. result.counter = 0
  153. newSeq(result.data, startSize)
  154. proc clear*(s: StringTableRef, mode: StringTableMode) {.
  155. rtlFunc, extern: "nst$1".} =
  156. ## resets a string table to be empty again.
  157. s.mode = mode
  158. s.counter = 0
  159. s.data.setLen(startSize)
  160. for i in 0..<s.data.len:
  161. s.data[i].hasValue = false
  162. proc newStringTable*(keyValuePairs: varargs[string],
  163. mode: StringTableMode): StringTableRef {.
  164. rtlFunc, extern: "nst$1WithPairs".} =
  165. ## creates a new string table with given key value pairs.
  166. ## Example::
  167. ## var mytab = newStringTable("key1", "val1", "key2", "val2",
  168. ## modeCaseInsensitive)
  169. result = newStringTable(mode)
  170. var i = 0
  171. while i < high(keyValuePairs):
  172. result[keyValuePairs[i]] = keyValuePairs[i + 1]
  173. inc(i, 2)
  174. proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]],
  175. mode: StringTableMode = modeCaseSensitive): StringTableRef {.
  176. rtlFunc, extern: "nst$1WithTableConstr".} =
  177. ## creates a new string table with given key value pairs.
  178. ## Example::
  179. ## var mytab = newStringTable({"key1": "val1", "key2": "val2"},
  180. ## modeCaseInsensitive)
  181. result = newStringTable(mode)
  182. for key, val in items(keyValuePairs): result[key] = val
  183. proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {.
  184. rtlFunc, extern: "nstFormat".} =
  185. ## The `%` operator for string tables.
  186. const
  187. PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
  188. result = ""
  189. var i = 0
  190. while i < len(f):
  191. if f[i] == '$':
  192. case f[i+1]
  193. of '$':
  194. add(result, '$')
  195. inc(i, 2)
  196. of '{':
  197. var j = i + 1
  198. while j < f.len and f[j] != '}': inc(j)
  199. add(result, getValue(t, flags, substr(f, i+2, j-1)))
  200. i = j + 1
  201. of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_':
  202. var j = i + 1
  203. while j < f.len and f[j] in PatternChars: inc(j)
  204. add(result, getValue(t, flags, substr(f, i+1, j-1)))
  205. i = j
  206. else:
  207. add(result, f[i])
  208. inc(i)
  209. else:
  210. add(result, f[i])
  211. inc(i)
  212. proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} =
  213. ## The `$` operator for string tables.
  214. if t.len == 0:
  215. result = "{:}"
  216. else:
  217. result = "{"
  218. for key, val in pairs(t):
  219. if result.len > 1: result.add(", ")
  220. result.add(key)
  221. result.add(": ")
  222. result.add(val)
  223. result.add("}")
  224. when isMainModule:
  225. var x = {"k": "v", "11": "22", "565": "67"}.newStringTable
  226. assert x["k"] == "v"
  227. assert x["11"] == "22"
  228. assert x["565"] == "67"
  229. x["11"] = "23"
  230. assert x["11"] == "23"
  231. x.clear(modeCaseInsensitive)
  232. x["11"] = "22"
  233. assert x["11"] == "22"