ropes.nim 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains support for a `rope`:idx: data type.
  10. ## Ropes can represent very long strings efficiently; especially concatenation
  11. ## is done in O(1) instead of O(n). They are essentially concatenation
  12. ## trees that are only flattened when converting to a native Nim
  13. ## string. The empty string is represented by ``nil``. Ropes are immutable and
  14. ## subtrees can be shared without copying.
  15. ## Leaves can be cached for better memory efficiency at the cost of
  16. ## runtime efficiency.
  17. include "system/inclrtl"
  18. import streams
  19. {.deadCodeElim: on.} # dce option deprecated
  20. {.push debugger:off .} # the user does not want to trace a part
  21. # of the standard library!
  22. const
  23. countCacheMisses = false
  24. var
  25. cacheEnabled = false
  26. type
  27. Rope* = ref RopeObj ## empty rope is represented by nil
  28. RopeObj {.acyclic.} = object
  29. left, right: Rope
  30. length: int
  31. data: string # != nil if a leaf
  32. # Note that the left and right pointers are not needed for leafs.
  33. # Leaves have relatively high memory overhead (~30 bytes on a 32
  34. # bit machine) and we produce many of them. This is why we cache and
  35. # share leafs across different rope trees.
  36. # To cache them they are inserted in another tree, a splay tree for best
  37. # performance. But for the caching tree we use the leaf's left and right
  38. # pointers.
  39. proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
  40. ## the rope's length
  41. if a == nil: result = 0
  42. else: result = a.length
  43. proc newRope(): Rope = new(result)
  44. proc newRope(data: string): Rope =
  45. new(result)
  46. result.length = len(data)
  47. result.data = data
  48. var
  49. cache {.threadvar.}: Rope # the root of the cache tree
  50. N {.threadvar.}: Rope # dummy rope needed for splay algorithm
  51. when countCacheMisses:
  52. var misses, hits: int
  53. proc splay(s: string, tree: Rope, cmpres: var int): Rope =
  54. var c: int
  55. var t = tree
  56. N.left = nil
  57. N.right = nil # reset to nil
  58. var le = N
  59. var r = N
  60. while true:
  61. c = cmp(s, t.data)
  62. if c < 0:
  63. if (t.left != nil) and (s < t.left.data):
  64. var y = t.left
  65. t.left = y.right
  66. y.right = t
  67. t = y
  68. if t.left == nil: break
  69. r.left = t
  70. r = t
  71. t = t.left
  72. elif c > 0:
  73. if (t.right != nil) and (s > t.right.data):
  74. var y = t.right
  75. t.right = y.left
  76. y.left = t
  77. t = y
  78. if t.right == nil: break
  79. le.right = t
  80. le = t
  81. t = t.right
  82. else:
  83. break
  84. cmpres = c
  85. le.right = t.left
  86. r.left = t.right
  87. t.left = N.right
  88. t.right = N.left
  89. result = t
  90. proc insertInCache(s: string, tree: Rope): Rope =
  91. var t = tree
  92. if t == nil:
  93. result = newRope(s)
  94. when countCacheMisses: inc(misses)
  95. return
  96. var cmp: int
  97. t = splay(s, t, cmp)
  98. if cmp == 0:
  99. # We get here if it's already in the Tree
  100. # Don't add it again
  101. result = t
  102. when countCacheMisses: inc(hits)
  103. else:
  104. when countCacheMisses: inc(misses)
  105. result = newRope(s)
  106. if cmp < 0:
  107. result.left = t.left
  108. result.right = t
  109. t.left = nil
  110. else:
  111. # i > t.item:
  112. result.right = t.right
  113. result.left = t
  114. t.right = nil
  115. proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
  116. ## Converts a string to a rope.
  117. if s.len == 0:
  118. result = nil
  119. else:
  120. when nimvm:
  121. # No caching in VM context
  122. result = newRope(s)
  123. else:
  124. if cacheEnabled:
  125. result = insertInCache(s, cache)
  126. cache = result
  127. else:
  128. result = newRope(s)
  129. proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
  130. ## Converts an int to a rope.
  131. result = rope($i)
  132. proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
  133. ## Converts a float to a rope.
  134. result = rope($f)
  135. proc enableCache*() {.rtl, extern: "nro$1".} =
  136. ## Enables the caching of leaves. This reduces the memory footprint at
  137. ## the cost of runtime efficiency.
  138. cacheEnabled = true
  139. proc disableCache*() {.rtl, extern: "nro$1".} =
  140. ## the cache is discarded and disabled. The GC will reuse its used memory.
  141. cache = nil
  142. cacheEnabled = false
  143. proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
  144. ## the concatenation operator for ropes.
  145. if a == nil:
  146. result = b
  147. elif b == nil:
  148. result = a
  149. else:
  150. result = newRope()
  151. result.length = a.length + b.length
  152. result.left = a
  153. result.right = b
  154. proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
  155. ## the concatenation operator for ropes.
  156. result = a & rope(b)
  157. proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
  158. ## the concatenation operator for ropes.
  159. result = rope(a) & b
  160. proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
  161. ## the concatenation operator for an openarray of ropes.
  162. for i in countup(0, high(a)): result = result & a[i]
  163. proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
  164. ## adds `b` to the rope `a`.
  165. a = a & b
  166. proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
  167. ## adds `b` to the rope `a`.
  168. a = a & b
  169. proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
  170. ## returns the character at position `i` in the rope `r`. This is quite
  171. ## expensive! Worst-case: O(n). If ``i >= r.len``, ``\0`` is returned.
  172. var x = r
  173. var j = i
  174. if x == nil: return
  175. while true:
  176. if x != nil and x.data.len > 0:
  177. if j < x.data.len: return x.data[j]
  178. return '\0'
  179. else:
  180. if x.left.length > j:
  181. x = x.left
  182. else:
  183. x = x.right
  184. dec(j, x.len)
  185. iterator leaves*(r: Rope): string =
  186. ## iterates over any leaf string in the rope `r`.
  187. if r != nil:
  188. var stack = @[r]
  189. while stack.len > 0:
  190. var it = stack.pop
  191. while it.left != nil:
  192. assert(it.right != nil)
  193. stack.add(it.right)
  194. it = it.left
  195. assert(it != nil)
  196. yield it.data
  197. iterator items*(r: Rope): char =
  198. ## iterates over any character in the rope `r`.
  199. for s in leaves(r):
  200. for c in items(s): yield c
  201. proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
  202. ## writes a rope to a file.
  203. for s in leaves(r): write(f, s)
  204. proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
  205. ## writes a rope to a stream.
  206. for rs in leaves(r): write(s, rs)
  207. proc `$`*(r: Rope): string {.rtl, extern: "nroToString".}=
  208. ## converts a rope back to a string.
  209. result = newStringOfCap(r.len)
  210. for s in leaves(r): add(result, s)
  211. proc `%`*(frmt: string, args: openArray[Rope]): Rope {.
  212. rtl, extern: "nroFormat".} =
  213. ## `%` substitution operator for ropes. Does not support the ``$identifier``
  214. ## nor ``${identifier}`` notations.
  215. var i = 0
  216. var length = len(frmt)
  217. result = nil
  218. var num = 0
  219. while i < length:
  220. if frmt[i] == '$':
  221. inc(i)
  222. case frmt[i]
  223. of '$':
  224. add(result, "$")
  225. inc(i)
  226. of '#':
  227. inc(i)
  228. add(result, args[num])
  229. inc(num)
  230. of '0'..'9':
  231. var j = 0
  232. while true:
  233. j = j * 10 + ord(frmt[i]) - ord('0')
  234. inc(i)
  235. if frmt[i] notin {'0'..'9'}: break
  236. add(result, args[j-1])
  237. of '{':
  238. inc(i)
  239. var j = 0
  240. while frmt[i] in {'0'..'9'}:
  241. j = j * 10 + ord(frmt[i]) - ord('0')
  242. inc(i)
  243. if frmt[i] == '}': inc(i)
  244. else: raise newException(ValueError, "invalid format string")
  245. add(result, args[j-1])
  246. else: raise newException(ValueError, "invalid format string")
  247. var start = i
  248. while i < length:
  249. if frmt[i] != '$': inc(i)
  250. else: break
  251. if i - 1 >= start:
  252. add(result, substr(frmt, start, i - 1))
  253. proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.
  254. rtl, extern: "nro$1".} =
  255. ## shortcut for ``add(c, frmt % args)``.
  256. add(c, frmt % args)
  257. const
  258. bufSize = 1024 # 1 KB is reasonable
  259. proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
  260. ## returns true if the contents of the file `f` equal `r`.
  261. var
  262. buf: array[bufSize, char]
  263. bpos = buf.len
  264. blen = buf.len
  265. for s in leaves(r):
  266. var spos = 0
  267. let slen = s.len
  268. while spos < slen:
  269. if bpos == blen:
  270. # Read more data
  271. bpos = 0
  272. blen = readBuffer(f, addr(buf[0]), buf.len)
  273. if blen == 0: # no more data in file
  274. result = false
  275. return
  276. let n = min(blen - bpos, slen - spos)
  277. # TODO There's gotta be a better way of comparing here...
  278. if not equalMem(addr(buf[bpos]),
  279. cast[pointer](cast[int](cstring(s))+spos), n):
  280. result = false
  281. return
  282. spos += n
  283. bpos += n
  284. result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
  285. proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
  286. ## returns true if the contents of the file `f` equal `r`. If `f` does not
  287. ## exist, false is returned.
  288. var f: File
  289. result = open(f, filename)
  290. if result:
  291. result = equalsFile(r, f)
  292. close(f)
  293. new(N) # init dummy node for splay algorithm
  294. {.pop.}