strtabs.nim 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The ``strtabs`` module implements an efficient hash table that is a mapping
  10. ## from strings to strings. Supports a case-sensitive, case-insensitive and
  11. ## style-insensitive mode. An efficient string substitution operator ``%``
  12. ## for the string table is also provided.
  13. import
  14. hashes, strutils
  15. when defined(js):
  16. {.pragma: rtlFunc.}
  17. {.pragma: deprecatedGetFunc.}
  18. else:
  19. {.pragma: deprecatedGetFunc, deprecatedGet.}
  20. {.pragma: rtlFunc, rtl.}
  21. import os
  22. include "system/inclrtl"
  23. type
  24. StringTableMode* = enum ## describes the tables operation mode
  25. modeCaseSensitive, ## the table is case sensitive
  26. modeCaseInsensitive, ## the table is case insensitive
  27. modeStyleInsensitive ## the table is style insensitive
  28. KeyValuePair = tuple[key, val: string]
  29. KeyValuePairSeq = seq[KeyValuePair]
  30. StringTableObj* = object of RootObj
  31. counter: int
  32. data: KeyValuePairSeq
  33. mode: StringTableMode
  34. StringTableRef* = ref StringTableObj ## use this type to declare string tables
  35. {.deprecated: [TStringTableMode: StringTableMode,
  36. TStringTable: StringTableObj, PStringTable: StringTableRef].}
  37. proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} =
  38. ## returns the number of keys in `t`.
  39. result = t.counter
  40. iterator pairs*(t: StringTableRef): tuple[key, value: string] =
  41. ## iterates over every (key, value) pair in the table `t`.
  42. for h in 0..high(t.data):
  43. if not isNil(t.data[h].key):
  44. yield (t.data[h].key, t.data[h].val)
  45. iterator keys*(t: StringTableRef): string =
  46. ## iterates over every key in the table `t`.
  47. for h in 0..high(t.data):
  48. if not isNil(t.data[h].key):
  49. yield t.data[h].key
  50. iterator values*(t: StringTableRef): string =
  51. ## iterates over every value in the table `t`.
  52. for h in 0..high(t.data):
  53. if not isNil(t.data[h].key):
  54. yield t.data[h].val
  55. type
  56. FormatFlag* = enum ## flags for the `%` operator
  57. useEnvironment, ## use environment variable if the ``$key``
  58. ## is not found in the table. Does nothing when using `js` target.
  59. useEmpty, ## use the empty string as a default, thus it
  60. ## won't throw an exception if ``$key`` is not
  61. ## in the table
  62. useKey ## do not replace ``$key`` if it is not found
  63. ## in the table (or in the environment)
  64. {.deprecated: [TFormatFlag: FormatFlag].}
  65. # implementation
  66. const
  67. growthFactor = 2
  68. startSize = 64
  69. proc myhash(t: StringTableRef, key: string): Hash =
  70. case t.mode
  71. of modeCaseSensitive: result = hashes.hash(key)
  72. of modeCaseInsensitive: result = hashes.hashIgnoreCase(key)
  73. of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key)
  74. proc myCmp(t: StringTableRef, a, b: string): bool =
  75. case t.mode
  76. of modeCaseSensitive: result = cmp(a, b) == 0
  77. of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0
  78. of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0
  79. proc mustRehash(length, counter: int): bool =
  80. assert(length > counter)
  81. result = (length * 2 < counter * 3) or (length - counter < 4)
  82. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  83. result = ((5 * h) + 1) and maxHash
  84. proc rawGet(t: StringTableRef, key: string): int =
  85. var h: Hash = myhash(t, key) and high(t.data) # start with real hash value
  86. while not isNil(t.data[h].key):
  87. if myCmp(t, t.data[h].key, key):
  88. return h
  89. h = nextTry(h, high(t.data))
  90. result = - 1
  91. template get(t: StringTableRef, key: string) =
  92. var index = rawGet(t, key)
  93. if index >= 0: result = t.data[index].val
  94. else:
  95. when compiles($key):
  96. raise newException(KeyError, "key not found: " & $key)
  97. else:
  98. raise newException(KeyError, "key not found")
  99. proc `[]`*(t: StringTableRef, key: string): var string {.
  100. rtlFunc, extern: "nstTake", deprecatedGetFunc.} =
  101. ## retrieves the location at ``t[key]``. If `key` is not in `t`, the
  102. ## ``KeyError`` exception is raised. One can check with ``hasKey`` whether
  103. ## the key exists.
  104. get(t, key)
  105. proc mget*(t: StringTableRef, key: string): var string {.deprecated.} =
  106. ## retrieves the location at ``t[key]``. If `key` is not in `t`, the
  107. ## ``KeyError`` exception is raised. Use ```[]``` instead.
  108. get(t, key)
  109. proc getOrDefault*(t: StringTableRef; key: string, default: string = ""): string =
  110. var index = rawGet(t, key)
  111. if index >= 0: result = t.data[index].val
  112. else: result = default
  113. proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc, extern: "nst$1".} =
  114. ## returns true iff `key` is in the table `t`.
  115. result = rawGet(t, key) >= 0
  116. proc contains*(t: StringTableRef, key: string): bool =
  117. ## alias of `hasKey` for use with the `in` operator.
  118. return hasKey(t, key)
  119. proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) =
  120. var h: Hash = myhash(t, key) and high(data)
  121. while not isNil(data[h].key):
  122. h = nextTry(h, high(data))
  123. data[h].key = key
  124. data[h].val = val
  125. proc enlarge(t: StringTableRef) =
  126. var n: KeyValuePairSeq
  127. newSeq(n, len(t.data) * growthFactor)
  128. for i in countup(0, high(t.data)):
  129. if not isNil(t.data[i].key): rawInsert(t, n, t.data[i].key, t.data[i].val)
  130. swap(t.data, n)
  131. proc `[]=`*(t: StringTableRef, key, val: string) {.rtlFunc, extern: "nstPut".} =
  132. ## puts a (key, value)-pair into `t`.
  133. var index = rawGet(t, key)
  134. if index >= 0:
  135. t.data[index].val = val
  136. else:
  137. if mustRehash(len(t.data), t.counter): enlarge(t)
  138. rawInsert(t, t.data, key, val)
  139. inc(t.counter)
  140. proc raiseFormatException(s: string) =
  141. var e: ref ValueError
  142. new(e)
  143. e.msg = "format string: key not found: " & s
  144. raise e
  145. proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string =
  146. if hasKey(t, key): return t.getOrDefault(key)
  147. # hm difficult: assume safety in taint mode here. XXX This is dangerous!
  148. when defined(js):
  149. result = ""
  150. else:
  151. if useEnvironment in flags: result = os.getEnv(key).string
  152. else: result = ""
  153. if result.len == 0:
  154. if useKey in flags: result = '$' & key
  155. elif useEmpty notin flags: raiseFormatException(key)
  156. proc newStringTable*(mode: StringTableMode): StringTableRef {.
  157. rtlFunc, extern: "nst$1".} =
  158. ## creates a new string table that is empty.
  159. new(result)
  160. result.mode = mode
  161. result.counter = 0
  162. newSeq(result.data, startSize)
  163. proc clear*(s: StringTableRef, mode: StringTableMode) =
  164. ## resets a string table to be empty again.
  165. s.mode = mode
  166. s.counter = 0
  167. s.data.setLen(startSize)
  168. for i in 0..<s.data.len:
  169. if not isNil(s.data[i].key):
  170. s.data[i].key = nil
  171. proc newStringTable*(keyValuePairs: varargs[string],
  172. mode: StringTableMode): StringTableRef {.
  173. rtlFunc, extern: "nst$1WithPairs".} =
  174. ## creates a new string table with given key value pairs.
  175. ## Example::
  176. ## var mytab = newStringTable("key1", "val1", "key2", "val2",
  177. ## modeCaseInsensitive)
  178. result = newStringTable(mode)
  179. var i = 0
  180. while i < high(keyValuePairs):
  181. result[keyValuePairs[i]] = keyValuePairs[i + 1]
  182. inc(i, 2)
  183. proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]],
  184. mode: StringTableMode = modeCaseSensitive): StringTableRef {.
  185. rtlFunc, extern: "nst$1WithTableConstr".} =
  186. ## creates a new string table with given key value pairs.
  187. ## Example::
  188. ## var mytab = newStringTable({"key1": "val1", "key2": "val2"},
  189. ## modeCaseInsensitive)
  190. result = newStringTable(mode)
  191. for key, val in items(keyValuePairs): result[key] = val
  192. proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {.
  193. rtlFunc, extern: "nstFormat".} =
  194. ## The `%` operator for string tables.
  195. const
  196. PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
  197. result = ""
  198. var i = 0
  199. while i < len(f):
  200. if f[i] == '$':
  201. case f[i+1]
  202. of '$':
  203. add(result, '$')
  204. inc(i, 2)
  205. of '{':
  206. var j = i + 1
  207. while j < f.len and f[j] != '}': inc(j)
  208. add(result, getValue(t, flags, substr(f, i+2, j-1)))
  209. i = j + 1
  210. of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_':
  211. var j = i + 1
  212. while j < f.len and f[j] in PatternChars: inc(j)
  213. add(result, getValue(t, flags, substr(f, i+1, j-1)))
  214. i = j
  215. else:
  216. add(result, f[i])
  217. inc(i)
  218. else:
  219. add(result, f[i])
  220. inc(i)
  221. proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} =
  222. ## The `$` operator for string tables.
  223. if t.len == 0:
  224. result = "{:}"
  225. else:
  226. result = "{"
  227. for key, val in pairs(t):
  228. if result.len > 1: result.add(", ")
  229. result.add(key)
  230. result.add(": ")
  231. result.add(val)
  232. result.add("}")
  233. when isMainModule:
  234. var x = {"k": "v", "11": "22", "565": "67"}.newStringTable
  235. assert x["k"] == "v"
  236. assert x["11"] == "22"
  237. assert x["565"] == "67"
  238. x["11"] = "23"
  239. assert x["11"] == "23"
  240. x.clear(modeCaseInsensitive)
  241. x["11"] = "22"
  242. assert x["11"] == "22"