tamemfiles.nim 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. discard """
  2. output: '''
  3. loop 1a
  4. loop 1b; cols: @[1, x]
  5. loop 1c
  6. loop 1d
  7. loop 1a
  8. loop 1b; cols: @[2, y]
  9. loop 1c
  10. loop 1d
  11. '''
  12. cmd: "nim c --gc:arc $file"
  13. """
  14. # bug #13596
  15. import tables, memfiles, strutils, os
  16. type Splitr* = tuple[ repeat: bool, chrDlm: char, setDlm: set[char], n: int ]
  17. type csize = uint
  18. proc cmemchr*(s: pointer, c: char, n: csize): pointer {.
  19. importc: "memchr", header: "<string.h>" .}
  20. proc `-!`*(p, q: pointer): int {.inline.} =
  21. (cast[int](p) -% cast[int](q)).int
  22. proc `+!`*(p: pointer, i: int): pointer {.inline.} =
  23. cast[pointer](cast[int](p) +% i)
  24. proc `+!`*(p: pointer, i: uint64): pointer {.inline.} =
  25. cast[pointer](cast[uint64](p) + i)
  26. proc charEq(x, c: char): bool {.inline.} = x == c
  27. proc initSplitr*(delim: string): Splitr =
  28. result = default(Splitr)
  29. if delim == "white": #User can use any other permutation if needed
  30. result.repeat = true
  31. result.chrDlm = ' '
  32. result.setDlm = { ' ', '\t', '\n' }
  33. result.n = result.setDlm.card
  34. return
  35. for c in delim:
  36. if c in result.setDlm:
  37. result.repeat = true
  38. continue
  39. result.setDlm.incl(c)
  40. inc(result.n)
  41. if result.n == 1: #support n==1 test to allow memchr optimization
  42. result.chrDlm = delim[0]
  43. proc hash(x: MemSlice): int = 55542
  44. template defSplit[T](slc: T, fs: var seq[MemSlice], n: int, repeat: bool,
  45. sep: untyped, nextSep: untyped, isSep: untyped) {.dirty.} =
  46. fs.setLen(if n < 1: 16 else: n)
  47. var b = slc.data
  48. var eob = b +! slc.size
  49. while repeat and eob -! b > 0 and isSep((cast[cstring](b))[0], sep):
  50. b = b +! 1
  51. if b == eob: fs.setLen(0); return
  52. var e = nextSep(b, sep, (eob -! b).csize)
  53. while e != nil:
  54. if n < 1: #Unbounded msplit
  55. if result == fs.len - 1: #Expand capacity
  56. fs.setLen(if fs.len < 512: 2*fs.len else: fs.len + 512)
  57. elif result == n - 1: #Need 1 more slot for final field
  58. break
  59. fs[result].data = b
  60. fs[result].size = e -! b
  61. result += 1
  62. while repeat and eob -! e > 0 and isSep((cast[cstring](e))[1], sep):
  63. e = e +! 1
  64. b = e +! 1
  65. if eob -! b <= 0:
  66. b = eob
  67. break
  68. e = nextSep(b, sep, (eob -! b).csize)
  69. if not repeat or eob -! b > 0:
  70. fs[result].data = b
  71. fs[result].size = eob -! b
  72. result += 1
  73. fs.setLen(result)
  74. proc msplit*(s: MemSlice, fs: var seq[MemSlice], sep=' ', n=0,
  75. repeat=false): int =
  76. result = 0
  77. defSplit(s, fs, n, repeat, sep, cmemchr, charEq)
  78. proc split*(s: Splitr, line: MemSlice, cols: var seq[MemSlice],
  79. n=0) {.inline.} =
  80. discard msplit(line, cols, s.chrDlm, n, s.repeat)
  81. ########################################################################
  82. # Using lines instead of memSlices & split instead of splitr.split seems
  83. # to mask the arc problem, as does simplifying `Table` to `seq[char]`.
  84. proc load(path: string, delim=" "): Table[MemSlice, seq[char]] =
  85. result = default(Table[MemSlice, seq[char]])
  86. let f = memfiles.open(path)
  87. let splitr = initSplitr(delim)
  88. var cols: seq[MemSlice] = @[ ] # re-used seq buffer
  89. var nwSq = newSeqOfCap[char](1) # re-used seq value
  90. nwSq.setLen 1
  91. for line in memSlices(f, eat='\0'):
  92. stderr.write "loop 1a\n"
  93. splitr.split(line, cols, 2)
  94. stderr.write "loop 1b; cols: ", cols, "\n"
  95. let cs = cast[cstring](cols[0].data)
  96. stderr.write "loop 1c\n" #..reports exception here, but
  97. nwSq[0] = cs[0] #..actually doing out of bounds here
  98. stderr.write "loop 1d\n"
  99. result[cols[1]] = nwSq
  100. discard load(getAppDir() / "testfile.txt")