strtabs.nim 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## The ``strtabs`` module implements an efficient hash table that is a mapping
  10. ## from strings to strings. Supports a case-sensitive, case-insensitive and
  11. ## style-insensitive mode.
  12. runnableExamples:
  13. var t = newStringTable()
  14. t["name"] = "John"
  15. t["city"] = "Monaco"
  16. doAssert t.len == 2
  17. doAssert t.hasKey "name"
  18. doAssert "name" in t
  19. ## String tables can be created from a table constructor:
  20. runnableExamples:
  21. var t = {"name": "John", "city": "Monaco"}.newStringTable
  22. ## When using the style insensitive mode (``modeStyleInsensitive``),
  23. ## all letters are compared case insensitively within the ASCII range
  24. ## and underscores are ignored.
  25. runnableExamples:
  26. var x = newStringTable(modeStyleInsensitive)
  27. x["first_name"] = "John"
  28. x["LastName"] = "Doe"
  29. doAssert x["firstName"] == "John"
  30. doAssert x["last_name"] == "Doe"
  31. ## An efficient string substitution operator
  32. ## `% <#%25,string,StringTableRef,set[FormatFlag]>`_ for the string table
  33. ## is also provided.
  34. runnableExamples:
  35. var t = {"name": "John", "city": "Monaco"}.newStringTable
  36. doAssert "${name} lives in ${city}" % t == "John lives in Monaco"
  37. ## **See also:**
  38. ## * `tables module <tables.html>`_ for general hash tables
  39. ## * `sharedtables module<sharedtables.html>`_ for shared hash table support
  40. ## * `strutils module<strutils.html>`_ for common string functions
  41. ## * `json module<json.html>`_ for table-like structure which allows
  42. ## heterogeneous members
  43. import std/private/since
  44. import
  45. hashes, strutils
  46. when defined(js) or defined(nimscript) or defined(Standalone):
  47. {.pragma: rtlFunc.}
  48. else:
  49. {.pragma: rtlFunc, rtl.}
  50. import os
  51. include "system/inclrtl"
  52. type
  53. StringTableMode* = enum ## Describes the tables operation mode.
  54. modeCaseSensitive, ## the table is case sensitive
  55. modeCaseInsensitive, ## the table is case insensitive
  56. modeStyleInsensitive ## the table is style insensitive
  57. KeyValuePair = tuple[key, val: string, hasValue: bool]
  58. KeyValuePairSeq = seq[KeyValuePair]
  59. StringTableObj* = object of RootObj
  60. counter: int
  61. data: KeyValuePairSeq
  62. mode: StringTableMode
  63. StringTableRef* = ref StringTableObj
  64. FormatFlag* = enum ## Flags for the `%` operator.
  65. useEnvironment, ## Use environment variable if the ``$key``
  66. ## is not found in the table.
  67. ## Does nothing when using `js` target.
  68. useEmpty, ## Use the empty string as a default, thus it
  69. ## won't throw an exception if ``$key`` is not
  70. ## in the table.
  71. useKey ## Do not replace ``$key`` if it is not found
  72. ## in the table (or in the environment).
  73. const
  74. growthFactor = 2
  75. startSize = 64
  76. proc mode*(t: StringTableRef): StringTableMode {.inline.} = t.mode
  77. iterator pairs*(t: StringTableRef): tuple[key, value: string] =
  78. ## Iterates over every `(key, value)` pair in the table `t`.
  79. for h in 0..high(t.data):
  80. if t.data[h].hasValue:
  81. yield (t.data[h].key, t.data[h].val)
  82. iterator keys*(t: StringTableRef): string =
  83. ## Iterates over every key in the table `t`.
  84. for h in 0..high(t.data):
  85. if t.data[h].hasValue:
  86. yield t.data[h].key
  87. iterator values*(t: StringTableRef): string =
  88. ## Iterates over every value in the table `t`.
  89. for h in 0..high(t.data):
  90. if t.data[h].hasValue:
  91. yield t.data[h].val
  92. proc myhash(t: StringTableRef, key: string): Hash =
  93. case t.mode
  94. of modeCaseSensitive: result = hashes.hash(key)
  95. of modeCaseInsensitive: result = hashes.hashIgnoreCase(key)
  96. of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key)
  97. proc myCmp(t: StringTableRef, a, b: string): bool =
  98. case t.mode
  99. of modeCaseSensitive: result = cmp(a, b) == 0
  100. of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0
  101. of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0
  102. proc mustRehash(length, counter: int): bool =
  103. assert(length > counter)
  104. result = (length * 2 < counter * 3) or (length - counter < 4)
  105. proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  106. result = (h + 1) and maxHash
  107. proc rawGet(t: StringTableRef, key: string): int =
  108. var h: Hash = myhash(t, key) and high(t.data) # start with real hash value
  109. while t.data[h].hasValue:
  110. if myCmp(t, t.data[h].key, key):
  111. return h
  112. h = nextTry(h, high(t.data))
  113. result = - 1
  114. template get(t: StringTableRef, key: string) =
  115. var index = rawGet(t, key)
  116. if index >= 0: result = t.data[index].val
  117. else:
  118. raise newException(KeyError, "key not found: " & key)
  119. proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} =
  120. ## Returns the number of keys in `t`.
  121. result = t.counter
  122. proc `[]`*(t: StringTableRef, key: string): var string {.
  123. rtlFunc, extern: "nstTake".} =
  124. ## Retrieves the location at ``t[key]``.
  125. ##
  126. ## If `key` is not in `t`, the ``KeyError`` exception is raised.
  127. ## One can check with `hasKey proc <#hasKey,StringTableRef,string>`_
  128. ## whether the key exists.
  129. ##
  130. ## See also:
  131. ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_
  132. ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  133. ## (key, value) pair in the table
  134. ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key
  135. ## is in the table
  136. runnableExamples:
  137. var t = {"name": "John", "city": "Monaco"}.newStringTable
  138. doAssert t["name"] == "John"
  139. doAssertRaises(KeyError):
  140. echo t["occupation"]
  141. get(t, key)
  142. proc getOrDefault*(t: StringTableRef; key: string,
  143. default: string = ""): string =
  144. ## Retrieves the location at ``t[key]``.
  145. ##
  146. ## If `key` is not in `t`, the default value is returned (if not specified,
  147. ## it is an empty string (`""`)).
  148. ##
  149. ## See also:
  150. ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key
  151. ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key
  152. ## is in the table
  153. ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  154. ## (key, value) pair in the table
  155. runnableExamples:
  156. var t = {"name": "John", "city": "Monaco"}.newStringTable
  157. doAssert t.getOrDefault("name") == "John"
  158. doAssert t.getOrDefault("occupation") == ""
  159. doAssert t.getOrDefault("occupation", "teacher") == "teacher"
  160. doAssert t.getOrDefault("name", "Paul") == "John"
  161. var index = rawGet(t, key)
  162. if index >= 0: result = t.data[index].val
  163. else: result = default
  164. proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc,
  165. extern: "nst$1".} =
  166. ## Returns true if `key` is in the table `t`.
  167. ##
  168. ## See also:
  169. ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_
  170. ## * `contains proc <#contains,StringTableRef,string>`_
  171. runnableExamples:
  172. var t = {"name": "John", "city": "Monaco"}.newStringTable
  173. doAssert t.hasKey("name")
  174. doAssert not t.hasKey("occupation")
  175. result = rawGet(t, key) >= 0
  176. proc contains*(t: StringTableRef, key: string): bool =
  177. ## Alias of `hasKey proc <#hasKey,StringTableRef,string>`_ for use with
  178. ## the `in` operator.
  179. runnableExamples:
  180. var t = {"name": "John", "city": "Monaco"}.newStringTable
  181. doAssert "name" in t
  182. doAssert "occupation" notin t
  183. return hasKey(t, key)
  184. proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) =
  185. var h: Hash = myhash(t, key) and high(data)
  186. while data[h].hasValue:
  187. h = nextTry(h, high(data))
  188. data[h].key = key
  189. data[h].val = val
  190. data[h].hasValue = true
  191. proc enlarge(t: StringTableRef) =
  192. var n: KeyValuePairSeq
  193. newSeq(n, len(t.data) * growthFactor)
  194. for i in countup(0, high(t.data)):
  195. if t.data[i].hasValue: rawInsert(t, n, move t.data[i].key, move t.data[i].val)
  196. swap(t.data, n)
  197. proc `[]=`*(t: StringTableRef, key, val: string) {.
  198. rtlFunc, extern: "nstPut".} =
  199. ## Inserts a `(key, value)` pair into `t`.
  200. ##
  201. ## See also:
  202. ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key
  203. ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table
  204. runnableExamples:
  205. var t = {"name": "John", "city": "Monaco"}.newStringTable
  206. t["occupation"] = "teacher"
  207. doAssert t.hasKey("occupation")
  208. var index = rawGet(t, key)
  209. if index >= 0:
  210. t.data[index].val = val
  211. else:
  212. if mustRehash(len(t.data), t.counter): enlarge(t)
  213. rawInsert(t, t.data, key, val)
  214. inc(t.counter)
  215. proc newStringTable*(mode: StringTableMode): owned(StringTableRef) {.
  216. rtlFunc, extern: "nst$1", noSideEffect.} =
  217. ## Creates a new empty string table.
  218. ##
  219. ## See also:
  220. ## * `newStringTable(keyValuePairs) proc
  221. ## <#newStringTable,varargs[tuple[string,string]],StringTableMode>`_
  222. new(result)
  223. result.mode = mode
  224. result.counter = 0
  225. newSeq(result.data, startSize)
  226. proc newStringTable*(keyValuePairs: varargs[string],
  227. mode: StringTableMode): owned(StringTableRef) {.
  228. rtlFunc, extern: "nst$1WithPairs", noSideEffect.} =
  229. ## Creates a new string table with given `key, value` string pairs.
  230. ##
  231. ## `StringTableMode` must be specified.
  232. runnableExamples:
  233. var mytab = newStringTable("key1", "val1", "key2", "val2",
  234. modeCaseInsensitive)
  235. result = newStringTable(mode)
  236. var i = 0
  237. while i < high(keyValuePairs):
  238. {.noSideEffect.}:
  239. result[keyValuePairs[i]] = keyValuePairs[i + 1]
  240. inc(i, 2)
  241. proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]],
  242. mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {.
  243. rtlFunc, extern: "nst$1WithTableConstr", noSideEffect.} =
  244. ## Creates a new string table with given `(key, value)` tuple pairs.
  245. ##
  246. ## The default mode is case sensitive.
  247. runnableExamples:
  248. var
  249. mytab1 = newStringTable({"key1": "val1", "key2": "val2"}, modeCaseInsensitive)
  250. mytab2 = newStringTable([("key3", "val3"), ("key4", "val4")])
  251. result = newStringTable(mode)
  252. for key, val in items(keyValuePairs):
  253. {.noSideEffect.}:
  254. result[key] = val
  255. proc raiseFormatException(s: string) =
  256. raise newException(ValueError, "format string: key not found: " & s)
  257. proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string =
  258. if hasKey(t, key): return t.getOrDefault(key)
  259. # hm difficult: assume safety in taint mode here. XXX This is dangerous!
  260. when defined(js) or defined(nimscript) or defined(Standalone):
  261. result = ""
  262. else:
  263. if useEnvironment in flags: result = getEnv(key).string
  264. else: result = ""
  265. if result.len == 0:
  266. if useKey in flags: result = '$' & key
  267. elif useEmpty notin flags: raiseFormatException(key)
  268. proc clear*(s: StringTableRef, mode: StringTableMode) {.
  269. rtlFunc, extern: "nst$1".} =
  270. ## Resets a string table to be empty again, perhaps altering the mode.
  271. ##
  272. ## See also:
  273. ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table
  274. runnableExamples:
  275. var t = {"name": "John", "city": "Monaco"}.newStringTable
  276. clear(t, modeCaseSensitive)
  277. doAssert len(t) == 0
  278. doAssert "name" notin t
  279. doAssert "city" notin t
  280. s.mode = mode
  281. s.counter = 0
  282. s.data.setLen(startSize)
  283. for i in 0..<s.data.len:
  284. s.data[i].hasValue = false
  285. proc clear*(s: StringTableRef) {.since: (1, 1).} =
  286. ## Resets a string table to be empty again without changing the mode.
  287. s.clear(s.mode)
  288. proc del*(t: StringTableRef, key: string) =
  289. ## Removes `key` from `t`.
  290. ##
  291. ## See also:
  292. ## * `clear proc <#clear,StringTableRef,StringTableMode>`_ for resetting a
  293. ## table to be empty
  294. ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  295. ## (key, value) pair in the table
  296. runnableExamples:
  297. var t = {"name": "John", "city": "Monaco"}.newStringTable
  298. t.del("name")
  299. doAssert len(t) == 1
  300. doAssert "name" notin t
  301. doAssert "city" in t
  302. # Impl adapted from `tableimpl.delImplIdx`
  303. var i = rawGet(t, key)
  304. let msk = high(t.data)
  305. if i >= 0:
  306. dec(t.counter)
  307. block outer:
  308. while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
  309. var j = i # The correctness of this depends on (h+1) in nextTry,
  310. var r = j # though may be adaptable to other simple sequences.
  311. t.data[i].hasValue = false # mark current EMPTY
  312. t.data[i].key = ""
  313. t.data[i].val = ""
  314. while true:
  315. i = (i + 1) and msk # increment mod table size
  316. if not t.data[i].hasValue: # end of collision cluster; So all done
  317. break outer
  318. r = t.myhash(t.data[i].key) and msk # "home" location of key@i
  319. if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
  320. break
  321. when defined(js):
  322. t.data[j] = t.data[i]
  323. elif defined(gcDestructors):
  324. t.data[j] = move t.data[i]
  325. else:
  326. shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop
  327. proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} =
  328. ## The `$` operator for string tables. Used internally when calling
  329. ## `echo` on a table.
  330. if t.len == 0:
  331. result = "{:}"
  332. else:
  333. result = "{"
  334. for key, val in pairs(t):
  335. if result.len > 1: result.add(", ")
  336. result.add(key)
  337. result.add(": ")
  338. result.add(val)
  339. result.add("}")
  340. proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {.
  341. rtlFunc, extern: "nstFormat".} =
  342. ## The `%` operator for string tables.
  343. runnableExamples:
  344. var t = {"name": "John", "city": "Monaco"}.newStringTable
  345. doAssert "${name} lives in ${city}" % t == "John lives in Monaco"
  346. const
  347. PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
  348. result = ""
  349. var i = 0
  350. while i < len(f):
  351. if f[i] == '$':
  352. case f[i+1]
  353. of '$':
  354. add(result, '$')
  355. inc(i, 2)
  356. of '{':
  357. var j = i + 1
  358. while j < f.len and f[j] != '}': inc(j)
  359. add(result, getValue(t, flags, substr(f, i+2, j-1)))
  360. i = j + 1
  361. of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_':
  362. var j = i + 1
  363. while j < f.len and f[j] in PatternChars: inc(j)
  364. add(result, getValue(t, flags, substr(f, i+1, j-1)))
  365. i = j
  366. else:
  367. add(result, f[i])
  368. inc(i)
  369. else:
  370. add(result, f[i])
  371. inc(i)
  372. when isMainModule:
  373. var x = {"k": "v", "11": "22", "565": "67"}.newStringTable
  374. assert x["k"] == "v"
  375. assert x["11"] == "22"
  376. assert x["565"] == "67"
  377. x["11"] = "23"
  378. assert x["11"] == "23"
  379. x.clear(modeCaseInsensitive)
  380. x["11"] = "22"
  381. assert x["11"] == "22"