ropes.nim 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module contains support for a `rope`:idx: data type.
  10. ## Ropes can represent very long strings efficiently; in particular, concatenation
  11. ## is done in O(1) instead of O(n). They are essentially concatenation
  12. ## trees that are only flattened when converting to a native Nim
  13. ## string. The empty string is represented by `nil`. Ropes are immutable and
  14. ## subtrees can be shared without copying.
  15. ## Leaves can be cached for better memory efficiency at the cost of
  16. ## runtime efficiency.
  17. include system/inclrtl
  18. import streams
  19. {.push debugger: off.} # the user does not want to trace a part
  20. # of the standard library!
  21. const
  22. countCacheMisses = false
  23. var
  24. cacheEnabled = false
  25. type
  26. Rope* {.acyclic.} = ref object
  27. ## A rope data type. The empty rope is represented by `nil`.
  28. left, right: Rope
  29. length: int
  30. data: string # not empty if a leaf
  31. # Note that the left and right pointers are not needed for leafs.
  32. # Leaves have relatively high memory overhead (~30 bytes on a 32
  33. # bit machine) and we produce many of them. This is why we cache and
  34. # share leafs across different rope trees.
  35. # To cache them they are inserted in another tree, a splay tree for best
  36. # performance. But for the caching tree we use the leaf's left and right
  37. # pointers.
  38. proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
  39. ## The rope's length.
  40. if a == nil: 0 else: a.length
  41. proc newRope(): Rope = new(result)
  42. proc newRope(data: string): Rope =
  43. new(result)
  44. result.length = len(data)
  45. result.data = data
  46. var
  47. cache {.threadvar.}: Rope # the root of the cache tree
  48. N {.threadvar.}: Rope # dummy rope needed for splay algorithm
  49. when countCacheMisses:
  50. var misses, hits: int
  51. proc splay(s: string, tree: Rope, cmpres: var int): Rope =
  52. var c: int
  53. var t = tree
  54. N.left = nil
  55. N.right = nil # reset to nil
  56. var le = N
  57. var r = N
  58. while true:
  59. c = cmp(s, t.data)
  60. if c < 0:
  61. if (t.left != nil) and (s < t.left.data):
  62. var y = t.left
  63. t.left = y.right
  64. y.right = t
  65. t = y
  66. if t.left == nil: break
  67. r.left = t
  68. r = t
  69. t = t.left
  70. elif c > 0:
  71. if (t.right != nil) and (s > t.right.data):
  72. var y = t.right
  73. t.right = y.left
  74. y.left = t
  75. t = y
  76. if t.right == nil: break
  77. le.right = t
  78. le = t
  79. t = t.right
  80. else:
  81. break
  82. cmpres = c
  83. le.right = t.left
  84. r.left = t.right
  85. t.left = N.right
  86. t.right = N.left
  87. result = t
  88. proc insertInCache(s: string, tree: Rope): Rope =
  89. var t = tree
  90. if t == nil:
  91. result = newRope(s)
  92. when countCacheMisses: inc(misses)
  93. return
  94. var cmp: int
  95. t = splay(s, t, cmp)
  96. if cmp == 0:
  97. # We get here if it's already in the Tree
  98. # Don't add it again
  99. result = t
  100. when countCacheMisses: inc(hits)
  101. else:
  102. when countCacheMisses: inc(misses)
  103. result = newRope(s)
  104. if cmp < 0:
  105. result.left = t.left
  106. result.right = t
  107. t.left = nil
  108. else:
  109. # i > t.item:
  110. result.right = t.right
  111. result.left = t
  112. t.right = nil
  113. proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
  114. ## Converts a string to a rope.
  115. runnableExamples:
  116. let r = rope("I'm a rope")
  117. doAssert $r == "I'm a rope"
  118. if s.len == 0:
  119. result = nil
  120. else:
  121. when nimvm:
  122. # No caching in VM context
  123. result = newRope(s)
  124. else:
  125. if cacheEnabled:
  126. result = insertInCache(s, cache)
  127. cache = result
  128. else:
  129. result = newRope(s)
  130. proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
  131. ## Converts an int to a rope.
  132. runnableExamples:
  133. let r = rope(429)
  134. doAssert $r == "429"
  135. result = rope($i)
  136. proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
  137. ## Converts a float to a rope.
  138. runnableExamples:
  139. let r = rope(4.29)
  140. doAssert $r == "4.29"
  141. result = rope($f)
  142. proc enableCache*() {.rtl, extern: "nro$1".} =
  143. ## Enables the caching of leaves. This reduces the memory footprint at
  144. ## the cost of runtime efficiency.
  145. cacheEnabled = true
  146. proc disableCache*() {.rtl, extern: "nro$1".} =
  147. ## The cache is discarded and disabled. The GC will reuse its used memory.
  148. cache = nil
  149. cacheEnabled = false
  150. proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
  151. ## The concatenation operator for ropes.
  152. runnableExamples:
  153. let r = rope("Hello, ") & rope("Nim!")
  154. doAssert $r == "Hello, Nim!"
  155. if a == nil:
  156. result = b
  157. elif b == nil:
  158. result = a
  159. else:
  160. result = newRope()
  161. result.length = a.length + b.length
  162. result.left = a
  163. result.right = b
  164. proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
  165. ## The concatenation operator for ropes.
  166. runnableExamples:
  167. let r = rope("Hello, ") & "Nim!"
  168. doAssert $r == "Hello, Nim!"
  169. result = a & rope(b)
  170. proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
  171. ## The concatenation operator for ropes.
  172. runnableExamples:
  173. let r = "Hello, " & rope("Nim!")
  174. doAssert $r == "Hello, Nim!"
  175. result = rope(a) & b
  176. proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
  177. ## The concatenation operator for an `openArray` of ropes.
  178. runnableExamples:
  179. let r = &[rope("Hello, "), rope("Nim"), rope("!")]
  180. doAssert $r == "Hello, Nim!"
  181. for item in a: result = result & item
  182. proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
  183. ## Adds `b` to the rope `a`.
  184. runnableExamples:
  185. var r = rope("Hello, ")
  186. r.add(rope("Nim!"))
  187. doAssert $r == "Hello, Nim!"
  188. a = a & b
  189. proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
  190. ## Adds `b` to the rope `a`.
  191. runnableExamples:
  192. var r = rope("Hello, ")
  193. r.add("Nim!")
  194. doAssert $r == "Hello, Nim!"
  195. a = a & b
  196. proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
  197. ## Returns the character at position `i` in the rope `r`. This is quite
  198. ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned.
  199. runnableExamples:
  200. let r = rope("Hello, Nim!")
  201. doAssert r[0] == 'H'
  202. doAssert r[7] == 'N'
  203. doAssert r[22] == '\0'
  204. var x = r
  205. var j = i
  206. if x == nil or i < 0 or i >= r.len: return
  207. while true:
  208. if x != nil and x.data.len > 0:
  209. # leaf
  210. return x.data[j]
  211. else:
  212. if x.left.length > j:
  213. x = x.left
  214. else:
  215. dec(j, x.left.length)
  216. x = x.right
  217. iterator leaves*(r: Rope): string =
  218. ## Iterates over any leaf string in the rope `r`.
  219. runnableExamples:
  220. let r = rope("Hello") & rope(", Nim!")
  221. let s = ["Hello", ", Nim!"]
  222. var index = 0
  223. for leave in r.leaves:
  224. doAssert leave == s[index]
  225. inc(index)
  226. if r != nil:
  227. var stack = @[r]
  228. while stack.len > 0:
  229. var it = stack.pop
  230. while it.left != nil:
  231. assert(it.right != nil)
  232. stack.add(it.right)
  233. it = it.left
  234. assert(it != nil)
  235. yield it.data
  236. iterator items*(r: Rope): char =
  237. ## Iterates over any character in the rope `r`.
  238. for s in leaves(r):
  239. for c in items(s): yield c
  240. proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
  241. ## Writes a rope to a file.
  242. for s in leaves(r): write(f, s)
  243. proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
  244. ## Writes a rope to a stream.
  245. for rs in leaves(r): write(s, rs)
  246. proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
  247. ## Converts a rope back to a string.
  248. result = newStringOfCap(r.len)
  249. for s in leaves(r): add(result, s)
  250. proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} =
  251. ## `%` substitution operator for ropes. Does not support the `$identifier`
  252. ## nor `${identifier}` notations.
  253. runnableExamples:
  254. let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")]
  255. doAssert $r1 == "Nim is a great language"
  256. let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")]
  257. doAssert $r2 == "Nim is a great language"
  258. let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")]
  259. doAssert $r3 == "Nim is a great language"
  260. var i = 0
  261. var length = len(frmt)
  262. result = nil
  263. var num = 0
  264. while i < length:
  265. if frmt[i] == '$':
  266. inc(i)
  267. case frmt[i]
  268. of '$':
  269. add(result, "$")
  270. inc(i)
  271. of '#':
  272. inc(i)
  273. add(result, args[num])
  274. inc(num)
  275. of '0'..'9':
  276. var j = 0
  277. while true:
  278. j = j * 10 + ord(frmt[i]) - ord('0')
  279. inc(i)
  280. if i >= frmt.len or frmt[i] notin {'0'..'9'}: break
  281. add(result, args[j-1])
  282. of '{':
  283. inc(i)
  284. var j = 0
  285. while frmt[i] in {'0'..'9'}:
  286. j = j * 10 + ord(frmt[i]) - ord('0')
  287. inc(i)
  288. if frmt[i] == '}': inc(i)
  289. else: raise newException(ValueError, "invalid format string")
  290. add(result, args[j-1])
  291. else: raise newException(ValueError, "invalid format string")
  292. var start = i
  293. while i < length:
  294. if frmt[i] != '$': inc(i)
  295. else: break
  296. if i - 1 >= start:
  297. add(result, substr(frmt, start, i - 1))
  298. proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} =
  299. ## Shortcut for `add(c, frmt % args)`.
  300. runnableExamples:
  301. var r = rope("Dash: ")
  302. r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")]
  303. doAssert $r == "Dash: Nim is a great language"
  304. add(c, frmt % args)
  305. when not defined(js) and not defined(nimscript):
  306. const
  307. bufSize = 1024 # 1 KB is reasonable
  308. proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
  309. ## Returns true if the contents of the file `f` equal `r`.
  310. var
  311. buf: array[bufSize, char]
  312. bpos = buf.len
  313. blen = buf.len
  314. for s in leaves(r):
  315. var spos = 0
  316. let slen = s.len
  317. while spos < slen:
  318. if bpos == blen:
  319. # Read more data
  320. bpos = 0
  321. blen = readBuffer(f, addr(buf[0]), buf.len)
  322. if blen == 0: # no more data in file
  323. return false
  324. let n = min(blen - bpos, slen - spos)
  325. # TODO: There's gotta be a better way of comparing here...
  326. if not equalMem(addr(buf[bpos]),
  327. cast[pointer](cast[int](cstring(s)) + spos), n):
  328. return false
  329. spos += n
  330. bpos += n
  331. result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
  332. proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
  333. ## Returns true if the contents of the file `f` equal `r`. If `f` does not
  334. ## exist, false is returned.
  335. var f: File
  336. result = open(f, filename)
  337. if result:
  338. result = equalsFile(r, f)
  339. close(f)
  340. new(N) # init dummy node for splay algorithm
  341. {.pop.}