123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401 |
- #
- #
- # Nim's Runtime Library
- # (c) Copyright 2010 Andreas Rumpf
- #
- # See the file "copying.txt", included in this
- # distribution, for details about the copyright.
- #
- ## This module contains support for a `rope`:idx: data type.
- ## Ropes can represent very long strings efficiently; in particular, concatenation
- ## is done in O(1) instead of O(n). They are essentially concatenation
- ## trees that are only flattened when converting to a native Nim
- ## string. The empty string is represented by `nil`. Ropes are immutable and
- ## subtrees can be shared without copying.
- ## Leaves can be cached for better memory efficiency at the cost of
- ## runtime efficiency.
- include system/inclrtl
- import std/streams
- when defined(nimPreviewSlimSystem):
- import std/[syncio, formatfloat, assertions]
- {.push debugger: off.} # the user does not want to trace a part
- # of the standard library!
- const
- countCacheMisses = false
- var
- cacheEnabled = false
- type
- Rope* {.acyclic.} = ref object
- ## A rope data type. The empty rope is represented by `nil`.
- left, right: Rope
- length: int
- data: string # not empty if a leaf
- # Note that the left and right pointers are not needed for leafs.
- # Leaves have relatively high memory overhead (~30 bytes on a 32
- # bit machine) and we produce many of them. This is why we cache and
- # share leafs across different rope trees.
- # To cache them they are inserted in another tree, a splay tree for best
- # performance. But for the caching tree we use the leaf's left and right
- # pointers.
- proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
- ## The rope's length.
- if a == nil: 0 else: a.length
- proc newRope(): Rope = new(result)
- proc newRope(data: string): Rope =
- new(result)
- result.length = len(data)
- result.data = data
- var
- cache {.threadvar.}: Rope # the root of the cache tree
- N {.threadvar.}: Rope # dummy rope needed for splay algorithm
- when countCacheMisses:
- var misses, hits: int
- proc splay(s: string, tree: Rope, cmpres: var int): Rope =
- var c: int
- var t = tree
- N.left = nil
- N.right = nil # reset to nil
- var le = N
- var r = N
- while true:
- c = cmp(s, t.data)
- if c < 0:
- if (t.left != nil) and (s < t.left.data):
- var y = t.left
- t.left = y.right
- y.right = t
- t = y
- if t.left == nil: break
- r.left = t
- r = t
- t = t.left
- elif c > 0:
- if (t.right != nil) and (s > t.right.data):
- var y = t.right
- t.right = y.left
- y.left = t
- t = y
- if t.right == nil: break
- le.right = t
- le = t
- t = t.right
- else:
- break
- cmpres = c
- le.right = t.left
- r.left = t.right
- t.left = N.right
- t.right = N.left
- result = t
- proc insertInCache(s: string, tree: Rope): Rope =
- var t = tree
- if t == nil:
- result = newRope(s)
- when countCacheMisses: inc(misses)
- return
- var cmp: int
- t = splay(s, t, cmp)
- if cmp == 0:
- # We get here if it's already in the Tree
- # Don't add it again
- result = t
- when countCacheMisses: inc(hits)
- else:
- when countCacheMisses: inc(misses)
- result = newRope(s)
- if cmp < 0:
- result.left = t.left
- result.right = t
- t.left = nil
- else:
- # i > t.item:
- result.right = t.right
- result.left = t
- t.right = nil
- proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
- ## Converts a string to a rope.
- runnableExamples:
- let r = rope("I'm a rope")
- doAssert $r == "I'm a rope"
- if s.len == 0:
- result = nil
- else:
- when nimvm:
- # No caching in VM context
- result = newRope(s)
- else:
- if cacheEnabled:
- result = insertInCache(s, cache)
- cache = result
- else:
- result = newRope(s)
- proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
- ## Converts an int to a rope.
- runnableExamples:
- let r = rope(429)
- doAssert $r == "429"
- result = rope($i)
- proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
- ## Converts a float to a rope.
- runnableExamples:
- let r = rope(4.29)
- doAssert $r == "4.29"
- result = rope($f)
- proc enableCache*() {.rtl, extern: "nro$1".} =
- ## Enables the caching of leaves. This reduces the memory footprint at
- ## the cost of runtime efficiency.
- cacheEnabled = true
- proc disableCache*() {.rtl, extern: "nro$1".} =
- ## The cache is discarded and disabled. The GC will reuse its used memory.
- cache = nil
- cacheEnabled = false
- proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
- ## The concatenation operator for ropes.
- runnableExamples:
- let r = rope("Hello, ") & rope("Nim!")
- doAssert $r == "Hello, Nim!"
- if a == nil:
- result = b
- elif b == nil:
- result = a
- else:
- result = newRope()
- result.length = a.length + b.length
- result.left = a
- result.right = b
- proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
- ## The concatenation operator for ropes.
- runnableExamples:
- let r = rope("Hello, ") & "Nim!"
- doAssert $r == "Hello, Nim!"
- result = a & rope(b)
- proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
- ## The concatenation operator for ropes.
- runnableExamples:
- let r = "Hello, " & rope("Nim!")
- doAssert $r == "Hello, Nim!"
- result = rope(a) & b
- proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
- ## The concatenation operator for an `openArray` of ropes.
- runnableExamples:
- let r = &[rope("Hello, "), rope("Nim"), rope("!")]
- doAssert $r == "Hello, Nim!"
- for item in a: result = result & item
- proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
- ## Adds `b` to the rope `a`.
- runnableExamples:
- var r = rope("Hello, ")
- r.add(rope("Nim!"))
- doAssert $r == "Hello, Nim!"
- a = a & b
- proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
- ## Adds `b` to the rope `a`.
- runnableExamples:
- var r = rope("Hello, ")
- r.add("Nim!")
- doAssert $r == "Hello, Nim!"
- a = a & b
- proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
- ## Returns the character at position `i` in the rope `r`. This is quite
- ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned.
- runnableExamples:
- let r = rope("Hello, Nim!")
- doAssert r[0] == 'H'
- doAssert r[7] == 'N'
- doAssert r[22] == '\0'
- var x = r
- var j = i
- if x == nil or i < 0 or i >= r.len: return
- while true:
- if x != nil and x.data.len > 0:
- # leaf
- return x.data[j]
- else:
- if x.left.length > j:
- x = x.left
- else:
- dec(j, x.left.length)
- x = x.right
- iterator leaves*(r: Rope): string =
- ## Iterates over any leaf string in the rope `r`.
- runnableExamples:
- let r = rope("Hello") & rope(", Nim!")
- let s = ["Hello", ", Nim!"]
- var index = 0
- for leave in r.leaves:
- doAssert leave == s[index]
- inc(index)
- if r != nil:
- var stack = @[r]
- while stack.len > 0:
- var it = stack.pop
- while it.left != nil:
- assert(it.right != nil)
- stack.add(it.right)
- it = it.left
- assert(it != nil)
- yield it.data
- iterator items*(r: Rope): char =
- ## Iterates over any character in the rope `r`.
- for s in leaves(r):
- for c in items(s): yield c
- proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
- ## Writes a rope to a file.
- for s in leaves(r): write(f, s)
- proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
- ## Writes a rope to a stream.
- for rs in leaves(r): write(s, rs)
- proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
- ## Converts a rope back to a string.
- result = newStringOfCap(r.len)
- for s in leaves(r): add(result, s)
- proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} =
- ## `%` substitution operator for ropes. Does not support the `$identifier`
- ## nor `${identifier}` notations.
- runnableExamples:
- let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")]
- doAssert $r1 == "Nim is a great language"
- let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")]
- doAssert $r2 == "Nim is a great language"
- let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")]
- doAssert $r3 == "Nim is a great language"
- var i = 0
- var length = len(frmt)
- result = nil
- var num = 0
- while i < length:
- if frmt[i] == '$':
- inc(i)
- case frmt[i]
- of '$':
- add(result, "$")
- inc(i)
- of '#':
- inc(i)
- add(result, args[num])
- inc(num)
- of '0'..'9':
- var j = 0
- while true:
- j = j * 10 + ord(frmt[i]) - ord('0')
- inc(i)
- if i >= frmt.len or frmt[i] notin {'0'..'9'}: break
- add(result, args[j-1])
- of '{':
- inc(i)
- var j = 0
- while frmt[i] in {'0'..'9'}:
- j = j * 10 + ord(frmt[i]) - ord('0')
- inc(i)
- if frmt[i] == '}': inc(i)
- else: raise newException(ValueError, "invalid format string")
- add(result, args[j-1])
- else: raise newException(ValueError, "invalid format string")
- var start = i
- while i < length:
- if frmt[i] != '$': inc(i)
- else: break
- if i - 1 >= start:
- add(result, substr(frmt, start, i - 1))
- proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} =
- ## Shortcut for `add(c, frmt % args)`.
- runnableExamples:
- var r = rope("Dash: ")
- r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")]
- doAssert $r == "Dash: Nim is a great language"
- add(c, frmt % args)
- when not defined(js) and not defined(nimscript):
- const
- bufSize = 1024 # 1 KB is reasonable
- proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
- ## Returns true if the contents of the file `f` equal `r`.
- var
- buf: array[bufSize, char]
- bpos = buf.len
- blen = buf.len
- for s in leaves(r):
- var spos = 0
- let slen = s.len
- while spos < slen:
- if bpos == blen:
- # Read more data
- bpos = 0
- blen = readBuffer(f, addr(buf[0]), buf.len)
- if blen == 0: # no more data in file
- return false
- let n = min(blen - bpos, slen - spos)
- # TODO: There's gotta be a better way of comparing here...
- if not equalMem(addr(buf[bpos]),
- cast[pointer](cast[int](cstring(s)) + spos), n):
- return false
- spos += n
- bpos += n
- result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
- proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
- ## Returns true if the contents of the file `f` equal `r`. If `f` does not
- ## exist, false is returned.
- var f: File
- result = open(f, filename)
- if result:
- result = equalsFile(r, f)
- close(f)
- new(N) # init dummy node for splay algorithm
- {.pop.}
|