ropes.nim 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. # Ropes for the C code generator
  10. #
  11. # Ropes are a data structure that represents a very long string
  12. # efficiently; especially concatenation is done in O(1) instead of O(N).
  13. # Ropes make use a lazy evaluation: They are essentially concatenation
  14. # trees that are only flattened when converting to a native Nim
  15. # string or when written to disk. The empty string is represented by a
  16. # nil pointer.
  17. # A little picture makes everything clear:
  18. #
  19. # "this string" & " is internally " & "represented as"
  20. #
  21. # con -- inner nodes do not contain raw data
  22. # / \
  23. # / \
  24. # / \
  25. # con "represented as"
  26. # / \
  27. # / \
  28. # / \
  29. # / \
  30. # / \
  31. #"this string" " is internally "
  32. #
  33. # Note that this is the same as:
  34. # "this string" & (" is internally " & "represented as")
  35. #
  36. # con
  37. # / \
  38. # / \
  39. # / \
  40. # "this string" con
  41. # / \
  42. # / \
  43. # / \
  44. # / \
  45. # / \
  46. #" is internally " "represented as"
  47. #
  48. # The 'con' operator is associative! This does not matter however for
  49. # the algorithms we use for ropes.
  50. #
  51. # Note that the left and right pointers are not needed for leaves.
  52. # Leaves have relatively high memory overhead (~30 bytes on a 32
  53. # bit machines) and we produce many of them. This is why we cache and
  54. # share leaves across different rope trees.
  55. # To cache them they are inserted in a `cache` array.
  56. import
  57. platform, hashes
  58. type
  59. FormatStr* = string # later we may change it to CString for better
  60. # performance of the code generator (assignments
  61. # copy the format strings
  62. # though it is not necessary)
  63. Rope* = ref RopeObj
  64. RopeObj*{.acyclic.} = object of RootObj # the empty rope is represented
  65. # by nil to safe space
  66. left*, right*: Rope
  67. length*: int
  68. data*: string # != nil if a leaf
  69. RopeSeq* = seq[Rope]
  70. RopesError* = enum
  71. rCannotOpenFile
  72. rInvalidFormatStr
  73. # implementation
  74. var errorHandler*: proc(err: RopesError, msg: string, useWarning = false)
  75. # avoid dependency on msgs.nim
  76. proc len*(a: Rope): int =
  77. ## the rope's length
  78. if a == nil: result = 0
  79. else: result = a.length
  80. proc newRope(data: string = nil): Rope =
  81. new(result)
  82. if data != nil:
  83. result.length = len(data)
  84. result.data = data
  85. proc newMutableRope*(capacity = 30): Rope =
  86. ## creates a new rope that supports direct modifications of the rope's
  87. ## 'data' and 'length' fields.
  88. new(result)
  89. result.data = newStringOfCap(capacity)
  90. proc freezeMutableRope*(r: Rope) {.inline.} =
  91. r.length = r.data.len
  92. var
  93. cache: array[0..2048*2 - 1, Rope]
  94. proc resetRopeCache* =
  95. for i in low(cache)..high(cache):
  96. cache[i] = nil
  97. proc ropeInvariant(r: Rope): bool =
  98. if r == nil:
  99. result = true
  100. else:
  101. result = true #
  102. # if r.data <> snil then
  103. # result := true
  104. # else begin
  105. # result := (r.left <> nil) and (r.right <> nil);
  106. # if result then result := ropeInvariant(r.left);
  107. # if result then result := ropeInvariant(r.right);
  108. # end
  109. var gCacheTries* = 0
  110. var gCacheMisses* = 0
  111. var gCacheIntTries* = 0
  112. proc insertInCache(s: string): Rope =
  113. inc gCacheTries
  114. var h = hash(s) and high(cache)
  115. result = cache[h]
  116. if isNil(result) or result.data != s:
  117. inc gCacheMisses
  118. result = newRope(s)
  119. cache[h] = result
  120. proc rope*(s: string): Rope =
  121. ## Converts a string to a rope.
  122. if s.len == 0:
  123. result = nil
  124. else:
  125. result = insertInCache(s)
  126. assert(ropeInvariant(result))
  127. proc rope*(i: BiggestInt): Rope =
  128. ## Converts an int to a rope.
  129. inc gCacheIntTries
  130. result = rope($i)
  131. proc rope*(f: BiggestFloat): Rope =
  132. ## Converts a float to a rope.
  133. result = rope($f)
  134. proc `&`*(a, b: Rope): Rope =
  135. if a == nil:
  136. result = b
  137. elif b == nil:
  138. result = a
  139. else:
  140. result = newRope()
  141. result.length = a.length + b.length
  142. result.left = a
  143. result.right = b
  144. proc `&`*(a: Rope, b: string): Rope =
  145. ## the concatenation operator for ropes.
  146. result = a & rope(b)
  147. proc `&`*(a: string, b: Rope): Rope =
  148. ## the concatenation operator for ropes.
  149. result = rope(a) & b
  150. proc `&`*(a: openArray[Rope]): Rope =
  151. ## the concatenation operator for an openarray of ropes.
  152. for i in countup(0, high(a)): result = result & a[i]
  153. proc add*(a: var Rope, b: Rope) =
  154. ## adds `b` to the rope `a`.
  155. a = a & b
  156. proc add*(a: var Rope, b: string) =
  157. ## adds `b` to the rope `a`.
  158. a = a & b
  159. iterator leaves*(r: Rope): string =
  160. ## iterates over any leaf string in the rope `r`.
  161. if r != nil:
  162. var stack = @[r]
  163. while stack.len > 0:
  164. var it = stack.pop
  165. while isNil(it.data):
  166. stack.add(it.right)
  167. it = it.left
  168. assert(it != nil)
  169. assert(it.data != nil)
  170. yield it.data
  171. iterator items*(r: Rope): char =
  172. ## iterates over any character in the rope `r`.
  173. for s in leaves(r):
  174. for c in items(s): yield c
  175. proc writeRope*(f: File, r: Rope) =
  176. ## writes a rope to a file.
  177. for s in leaves(r): write(f, s)
  178. proc writeRope*(head: Rope, filename: string, useWarning = false) =
  179. var f: File
  180. if open(f, filename, fmWrite):
  181. if head != nil: writeRope(f, head)
  182. close(f)
  183. else:
  184. errorHandler(rCannotOpenFile, filename, useWarning)
  185. proc `$`*(r: Rope): string =
  186. ## converts a rope back to a string.
  187. result = newString(r.len)
  188. setLen(result, 0)
  189. for s in leaves(r): add(result, s)
  190. proc ropeConcat*(a: varargs[Rope]): Rope =
  191. # not overloaded version of concat to speed-up `rfmt` a little bit
  192. for i in countup(0, high(a)): result = result & a[i]
  193. proc prepend*(a: var Rope, b: Rope) = a = b & a
  194. proc prepend*(a: var Rope, b: string) = a = b & a
  195. var
  196. rnl* = tnl.newRope
  197. softRnl* = tnl.newRope
  198. noRnl* = "".newRope
  199. proc `%`*(frmt: FormatStr, args: openArray[Rope]): Rope =
  200. var i = 0
  201. var length = len(frmt)
  202. result = nil
  203. var num = 0
  204. while i < length:
  205. if frmt[i] == '$':
  206. inc(i) # skip '$'
  207. case frmt[i]
  208. of '$':
  209. add(result, "$")
  210. inc(i)
  211. of '#':
  212. inc(i)
  213. add(result, args[num])
  214. inc(num)
  215. of '0'..'9':
  216. var j = 0
  217. while true:
  218. j = j * 10 + ord(frmt[i]) - ord('0')
  219. inc(i)
  220. if frmt[i] notin {'0'..'9'}: break
  221. num = j
  222. if j > high(args) + 1:
  223. errorHandler(rInvalidFormatStr, $(j))
  224. else:
  225. add(result, args[j-1])
  226. of '{':
  227. inc(i)
  228. var j = 0
  229. while frmt[i] in {'0'..'9'}:
  230. j = j * 10 + ord(frmt[i]) - ord('0')
  231. inc(i)
  232. num = j
  233. if frmt[i] == '}': inc(i)
  234. else: errorHandler(rInvalidFormatStr, $(frmt[i]))
  235. if j > high(args) + 1:
  236. errorHandler(rInvalidFormatStr, $(j))
  237. else:
  238. add(result, args[j-1])
  239. of 'n':
  240. add(result, softRnl)
  241. inc(i)
  242. of 'N':
  243. add(result, rnl)
  244. inc(i)
  245. else:
  246. errorHandler(rInvalidFormatStr, $(frmt[i]))
  247. var start = i
  248. while i < length:
  249. if frmt[i] != '$': inc(i)
  250. else: break
  251. if i - 1 >= start:
  252. add(result, substr(frmt, start, i - 1))
  253. assert(ropeInvariant(result))
  254. proc addf*(c: var Rope, frmt: FormatStr, args: openArray[Rope]) =
  255. ## shortcut for ``add(c, frmt % args)``.
  256. add(c, frmt % args)
  257. when true:
  258. template `~`*(r: string): Rope = r % []
  259. else:
  260. {.push stack_trace: off, line_trace: off.}
  261. proc `~`*(r: static[string]): Rope =
  262. # this is the new optimized "to rope" operator
  263. # the mnemonic is that `~` looks a bit like a rope :)
  264. var r {.global.} = r % []
  265. return r
  266. {.pop.}
  267. const
  268. bufSize = 1024 # 1 KB is reasonable
  269. proc equalsFile*(r: Rope, f: File): bool =
  270. ## returns true if the contents of the file `f` equal `r`.
  271. var
  272. buf: array[bufSize, char]
  273. bpos = buf.len
  274. blen = buf.len
  275. btotal = 0
  276. rtotal = 0
  277. for s in leaves(r):
  278. var spos = 0
  279. let slen = s.len
  280. rtotal += slen
  281. while spos < slen:
  282. if bpos == blen:
  283. # Read more data
  284. bpos = 0
  285. blen = readBuffer(f, addr(buf[0]), buf.len)
  286. btotal += blen
  287. if blen == 0: # no more data in file
  288. result = false
  289. return
  290. let n = min(blen - bpos, slen - spos)
  291. # TODO There's gotta be a better way of comparing here...
  292. if not equalMem(addr(buf[bpos]), cast[pointer](cast[int](cstring(s))+spos), n):
  293. result = false
  294. return
  295. spos += n
  296. bpos += n
  297. result = readBuffer(f, addr(buf[0]), 1) == 0 and
  298. btotal == rtotal # check that we've read all
  299. proc equalsFile*(r: Rope, filename: string): bool =
  300. ## returns true if the contents of the file `f` equal `r`. If `f` does not
  301. ## exist, false is returned.
  302. var f: File
  303. result = open(f, filename)
  304. if result:
  305. result = equalsFile(r, f)
  306. close(f)
  307. proc writeRopeIfNotEqual*(r: Rope, filename: string): bool =
  308. # returns true if overwritten
  309. if not equalsFile(r, filename):
  310. writeRope(r, filename)
  311. result = true
  312. else:
  313. result = false