ropes.nim 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains support for a `rope`:idx: data type.
  10. ## Ropes can represent very long strings efficiently; in particular, concatenation
  11. ## is done in O(1) instead of O(n). They are essentially concatenation
  12. ## trees that are only flattened when converting to a native Nim
  13. ## string. The empty string is represented by `nil`. Ropes are immutable and
  14. ## subtrees can be shared without copying.
  15. ## Leaves can be cached for better memory efficiency at the cost of
  16. ## runtime efficiency.
  17. include system/inclrtl
  18. import streams
  19. when defined(nimPreviewSlimSystem):
  20. import std/[syncio, formatfloat, assertions]
  21. {.push debugger: off.} # the user does not want to trace a part
  22. # of the standard library!
  23. const
  24. countCacheMisses = false
  25. var
  26. cacheEnabled = false
  27. type
  28. Rope* {.acyclic.} = ref object
  29. ## A rope data type. The empty rope is represented by `nil`.
  30. left, right: Rope
  31. length: int
  32. data: string # not empty if a leaf
  33. # Note that the left and right pointers are not needed for leafs.
  34. # Leaves have relatively high memory overhead (~30 bytes on a 32
  35. # bit machine) and we produce many of them. This is why we cache and
  36. # share leafs across different rope trees.
  37. # To cache them they are inserted in another tree, a splay tree for best
  38. # performance. But for the caching tree we use the leaf's left and right
  39. # pointers.
  40. proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
  41. ## The rope's length.
  42. if a == nil: 0 else: a.length
  43. proc newRope(): Rope = new(result)
  44. proc newRope(data: string): Rope =
  45. new(result)
  46. result.length = len(data)
  47. result.data = data
  48. var
  49. cache {.threadvar.}: Rope # the root of the cache tree
  50. N {.threadvar.}: Rope # dummy rope needed for splay algorithm
  51. when countCacheMisses:
  52. var misses, hits: int
  53. proc splay(s: string, tree: Rope, cmpres: var int): Rope =
  54. var c: int
  55. var t = tree
  56. N.left = nil
  57. N.right = nil # reset to nil
  58. var le = N
  59. var r = N
  60. while true:
  61. c = cmp(s, t.data)
  62. if c < 0:
  63. if (t.left != nil) and (s < t.left.data):
  64. var y = t.left
  65. t.left = y.right
  66. y.right = t
  67. t = y
  68. if t.left == nil: break
  69. r.left = t
  70. r = t
  71. t = t.left
  72. elif c > 0:
  73. if (t.right != nil) and (s > t.right.data):
  74. var y = t.right
  75. t.right = y.left
  76. y.left = t
  77. t = y
  78. if t.right == nil: break
  79. le.right = t
  80. le = t
  81. t = t.right
  82. else:
  83. break
  84. cmpres = c
  85. le.right = t.left
  86. r.left = t.right
  87. t.left = N.right
  88. t.right = N.left
  89. result = t
  90. proc insertInCache(s: string, tree: Rope): Rope =
  91. var t = tree
  92. if t == nil:
  93. result = newRope(s)
  94. when countCacheMisses: inc(misses)
  95. return
  96. var cmp: int
  97. t = splay(s, t, cmp)
  98. if cmp == 0:
  99. # We get here if it's already in the Tree
  100. # Don't add it again
  101. result = t
  102. when countCacheMisses: inc(hits)
  103. else:
  104. when countCacheMisses: inc(misses)
  105. result = newRope(s)
  106. if cmp < 0:
  107. result.left = t.left
  108. result.right = t
  109. t.left = nil
  110. else:
  111. # i > t.item:
  112. result.right = t.right
  113. result.left = t
  114. t.right = nil
  115. proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
  116. ## Converts a string to a rope.
  117. runnableExamples:
  118. let r = rope("I'm a rope")
  119. doAssert $r == "I'm a rope"
  120. if s.len == 0:
  121. result = nil
  122. else:
  123. when nimvm:
  124. # No caching in VM context
  125. result = newRope(s)
  126. else:
  127. if cacheEnabled:
  128. result = insertInCache(s, cache)
  129. cache = result
  130. else:
  131. result = newRope(s)
  132. proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
  133. ## Converts an int to a rope.
  134. runnableExamples:
  135. let r = rope(429)
  136. doAssert $r == "429"
  137. result = rope($i)
  138. proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
  139. ## Converts a float to a rope.
  140. runnableExamples:
  141. let r = rope(4.29)
  142. doAssert $r == "4.29"
  143. result = rope($f)
  144. proc enableCache*() {.rtl, extern: "nro$1".} =
  145. ## Enables the caching of leaves. This reduces the memory footprint at
  146. ## the cost of runtime efficiency.
  147. cacheEnabled = true
  148. proc disableCache*() {.rtl, extern: "nro$1".} =
  149. ## The cache is discarded and disabled. The GC will reuse its used memory.
  150. cache = nil
  151. cacheEnabled = false
  152. proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
  153. ## The concatenation operator for ropes.
  154. runnableExamples:
  155. let r = rope("Hello, ") & rope("Nim!")
  156. doAssert $r == "Hello, Nim!"
  157. if a == nil:
  158. result = b
  159. elif b == nil:
  160. result = a
  161. else:
  162. result = newRope()
  163. result.length = a.length + b.length
  164. result.left = a
  165. result.right = b
  166. proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
  167. ## The concatenation operator for ropes.
  168. runnableExamples:
  169. let r = rope("Hello, ") & "Nim!"
  170. doAssert $r == "Hello, Nim!"
  171. result = a & rope(b)
  172. proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
  173. ## The concatenation operator for ropes.
  174. runnableExamples:
  175. let r = "Hello, " & rope("Nim!")
  176. doAssert $r == "Hello, Nim!"
  177. result = rope(a) & b
  178. proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
  179. ## The concatenation operator for an `openArray` of ropes.
  180. runnableExamples:
  181. let r = &[rope("Hello, "), rope("Nim"), rope("!")]
  182. doAssert $r == "Hello, Nim!"
  183. for item in a: result = result & item
  184. proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
  185. ## Adds `b` to the rope `a`.
  186. runnableExamples:
  187. var r = rope("Hello, ")
  188. r.add(rope("Nim!"))
  189. doAssert $r == "Hello, Nim!"
  190. a = a & b
  191. proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
  192. ## Adds `b` to the rope `a`.
  193. runnableExamples:
  194. var r = rope("Hello, ")
  195. r.add("Nim!")
  196. doAssert $r == "Hello, Nim!"
  197. a = a & b
  198. proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
  199. ## Returns the character at position `i` in the rope `r`. This is quite
  200. ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned.
  201. runnableExamples:
  202. let r = rope("Hello, Nim!")
  203. doAssert r[0] == 'H'
  204. doAssert r[7] == 'N'
  205. doAssert r[22] == '\0'
  206. var x = r
  207. var j = i
  208. if x == nil or i < 0 or i >= r.len: return
  209. while true:
  210. if x != nil and x.data.len > 0:
  211. # leaf
  212. return x.data[j]
  213. else:
  214. if x.left.length > j:
  215. x = x.left
  216. else:
  217. dec(j, x.left.length)
  218. x = x.right
  219. iterator leaves*(r: Rope): string =
  220. ## Iterates over any leaf string in the rope `r`.
  221. runnableExamples:
  222. let r = rope("Hello") & rope(", Nim!")
  223. let s = ["Hello", ", Nim!"]
  224. var index = 0
  225. for leave in r.leaves:
  226. doAssert leave == s[index]
  227. inc(index)
  228. if r != nil:
  229. var stack = @[r]
  230. while stack.len > 0:
  231. var it = stack.pop
  232. while it.left != nil:
  233. assert(it.right != nil)
  234. stack.add(it.right)
  235. it = it.left
  236. assert(it != nil)
  237. yield it.data
  238. iterator items*(r: Rope): char =
  239. ## Iterates over any character in the rope `r`.
  240. for s in leaves(r):
  241. for c in items(s): yield c
  242. proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
  243. ## Writes a rope to a file.
  244. for s in leaves(r): write(f, s)
  245. proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
  246. ## Writes a rope to a stream.
  247. for rs in leaves(r): write(s, rs)
  248. proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
  249. ## Converts a rope back to a string.
  250. result = newStringOfCap(r.len)
  251. for s in leaves(r): add(result, s)
  252. proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} =
  253. ## `%` substitution operator for ropes. Does not support the `$identifier`
  254. ## nor `${identifier}` notations.
  255. runnableExamples:
  256. let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")]
  257. doAssert $r1 == "Nim is a great language"
  258. let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")]
  259. doAssert $r2 == "Nim is a great language"
  260. let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")]
  261. doAssert $r3 == "Nim is a great language"
  262. var i = 0
  263. var length = len(frmt)
  264. result = nil
  265. var num = 0
  266. while i < length:
  267. if frmt[i] == '$':
  268. inc(i)
  269. case frmt[i]
  270. of '$':
  271. add(result, "$")
  272. inc(i)
  273. of '#':
  274. inc(i)
  275. add(result, args[num])
  276. inc(num)
  277. of '0'..'9':
  278. var j = 0
  279. while true:
  280. j = j * 10 + ord(frmt[i]) - ord('0')
  281. inc(i)
  282. if i >= frmt.len or frmt[i] notin {'0'..'9'}: break
  283. add(result, args[j-1])
  284. of '{':
  285. inc(i)
  286. var j = 0
  287. while frmt[i] in {'0'..'9'}:
  288. j = j * 10 + ord(frmt[i]) - ord('0')
  289. inc(i)
  290. if frmt[i] == '}': inc(i)
  291. else: raise newException(ValueError, "invalid format string")
  292. add(result, args[j-1])
  293. else: raise newException(ValueError, "invalid format string")
  294. var start = i
  295. while i < length:
  296. if frmt[i] != '$': inc(i)
  297. else: break
  298. if i - 1 >= start:
  299. add(result, substr(frmt, start, i - 1))
  300. proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} =
  301. ## Shortcut for `add(c, frmt % args)`.
  302. runnableExamples:
  303. var r = rope("Dash: ")
  304. r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")]
  305. doAssert $r == "Dash: Nim is a great language"
  306. add(c, frmt % args)
  307. when not defined(js) and not defined(nimscript):
  308. const
  309. bufSize = 1024 # 1 KB is reasonable
  310. proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
  311. ## Returns true if the contents of the file `f` equal `r`.
  312. var
  313. buf: array[bufSize, char]
  314. bpos = buf.len
  315. blen = buf.len
  316. for s in leaves(r):
  317. var spos = 0
  318. let slen = s.len
  319. while spos < slen:
  320. if bpos == blen:
  321. # Read more data
  322. bpos = 0
  323. blen = readBuffer(f, addr(buf[0]), buf.len)
  324. if blen == 0: # no more data in file
  325. return false
  326. let n = min(blen - bpos, slen - spos)
  327. # TODO: There's gotta be a better way of comparing here...
  328. if not equalMem(addr(buf[bpos]),
  329. cast[pointer](cast[int](cstring(s)) + spos), n):
  330. return false
  331. spos += n
  332. bpos += n
  333. result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
  334. proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
  335. ## Returns true if the contents of the file `f` equal `r`. If `f` does not
  336. ## exist, false is returned.
  337. var f: File
  338. result = open(f, filename)
  339. if result:
  340. result = equalsFile(r, f)
  341. close(f)
  342. new(N) # init dummy node for splay algorithm
  343. {.pop.}