memfiles.nim 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Nim Contributors
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## :Authors: Zahary Karadjov, Andreas Rumpf
  10. ##
  11. ## This module provides support for `memory mapped files`:idx:
  12. ## (Posix's `mmap`:idx:) on the different operating systems.
  13. ##
  14. ## It also provides some fast iterators over lines in text files (or
  15. ## other "line-like", variable length, delimited records).
  16. when defined(windows):
  17. import winlean
  18. elif defined(posix):
  19. import posix
  20. else:
  21. {.error: "the memfiles module is not supported on your operating system!".}
  22. import os
  23. type
  24. MemFile* = object ## represents a memory mapped file
  25. mem*: pointer ## a pointer to the memory mapped file. The pointer
  26. ## can be used directly to change the contents of the
  27. ## file, if it was opened with write access.
  28. size*: int ## size of the memory mapped file
  29. when defined(windows):
  30. fHandle: Handle
  31. mapHandle: Handle
  32. wasOpened: bool ## only close if wasOpened
  33. else:
  34. handle: cint
  35. {.deprecated: [TMemFile: MemFile].}
  36. proc mapMem*(m: var MemFile, mode: FileMode = fmRead,
  37. mappedSize = -1, offset = 0): pointer =
  38. ## returns a pointer to a mapped portion of MemFile `m`
  39. ##
  40. ## ``mappedSize`` of ``-1`` maps to the whole file, and
  41. ## ``offset`` must be multiples of the PAGE SIZE of your OS
  42. var readonly = mode == fmRead
  43. when defined(windows):
  44. result = mapViewOfFileEx(
  45. m.mapHandle,
  46. if readonly: FILE_MAP_READ else: FILE_MAP_WRITE,
  47. int32(offset shr 32),
  48. int32(offset and 0xffffffff),
  49. if mappedSize == -1: 0 else: mappedSize,
  50. nil)
  51. if result == nil:
  52. raiseOSError(osLastError())
  53. else:
  54. assert mappedSize > 0
  55. result = mmap(
  56. nil,
  57. mappedSize,
  58. if readonly: PROT_READ else: PROT_READ or PROT_WRITE,
  59. if readonly: (MAP_PRIVATE or MAP_POPULATE) else: (MAP_SHARED or MAP_POPULATE),
  60. m.handle, offset)
  61. if result == cast[pointer](MAP_FAILED):
  62. raiseOSError(osLastError())
  63. proc unmapMem*(f: var MemFile, p: pointer, size: int) =
  64. ## unmaps the memory region ``(p, <p+size)`` of the mapped file `f`.
  65. ## All changes are written back to the file system, if `f` was opened
  66. ## with write access.
  67. ##
  68. ## ``size`` must be of exactly the size that was requested
  69. ## via ``mapMem``.
  70. when defined(windows):
  71. if unmapViewOfFile(p) == 0: raiseOSError(osLastError())
  72. else:
  73. if munmap(p, size) != 0: raiseOSError(osLastError())
  74. proc open*(filename: string, mode: FileMode = fmRead,
  75. mappedSize = -1, offset = 0, newFileSize = -1,
  76. allowRemap = false): MemFile =
  77. ## opens a memory mapped file. If this fails, ``EOS`` is raised.
  78. ##
  79. ## ``newFileSize`` can only be set if the file does not exist and is opened
  80. ## with write access (e.g., with fmReadWrite).
  81. ##
  82. ##``mappedSize`` and ``offset``
  83. ## can be used to map only a slice of the file.
  84. ##
  85. ## ``offset`` must be multiples of the PAGE SIZE of your OS
  86. ## (usually 4K or 8K but is unique to your OS)
  87. ##
  88. ## ``allowRemap`` only needs to be true if you want to call ``mapMem`` on
  89. ## the resulting MemFile; else file handles are not kept open.
  90. ##
  91. ## Example:
  92. ##
  93. ## .. code-block:: nim
  94. ## var
  95. ## mm, mm_full, mm_half: MemFile
  96. ##
  97. ## mm = memfiles.open("/tmp/test.mmap", mode = fmWrite, newFileSize = 1024) # Create a new file
  98. ## mm.close()
  99. ##
  100. ## # Read the whole file, would fail if newFileSize was set
  101. ## mm_full = memfiles.open("/tmp/test.mmap", mode = fmReadWrite, mappedSize = -1)
  102. ##
  103. ## # Read the first 512 bytes
  104. ## mm_half = memfiles.open("/tmp/test.mmap", mode = fmReadWrite, mappedSize = 512)
  105. # The file can be resized only when write mode is used:
  106. assert newFileSize == -1 or mode != fmRead
  107. var readonly = mode == fmRead
  108. template rollback =
  109. result.mem = nil
  110. result.size = 0
  111. when defined(windows):
  112. template fail(errCode: OSErrorCode, msg: untyped) =
  113. rollback()
  114. if result.fHandle != 0: discard closeHandle(result.fHandle)
  115. if result.mapHandle != 0: discard closeHandle(result.mapHandle)
  116. raiseOSError(errCode)
  117. # return false
  118. #raise newException(EIO, msg)
  119. template callCreateFile(winApiProc, filename): untyped =
  120. winApiProc(
  121. filename,
  122. # GENERIC_ALL != (GENERIC_READ or GENERIC_WRITE)
  123. if readonly: GENERIC_READ else: GENERIC_READ or GENERIC_WRITE,
  124. FILE_SHARE_READ,
  125. nil,
  126. if newFileSize != -1: CREATE_ALWAYS else: OPEN_EXISTING,
  127. if readonly: FILE_ATTRIBUTE_READONLY else: FILE_ATTRIBUTE_TEMPORARY,
  128. 0)
  129. when useWinUnicode:
  130. result.fHandle = callCreateFile(createFileW, newWideCString(filename))
  131. else:
  132. result.fHandle = callCreateFile(createFileA, filename)
  133. if result.fHandle == INVALID_HANDLE_VALUE:
  134. fail(osLastError(), "error opening file")
  135. if newFileSize != -1:
  136. var
  137. sizeHigh = int32(newFileSize shr 32)
  138. sizeLow = int32(newFileSize and 0xffffffff)
  139. var status = setFilePointer(result.fHandle, sizeLow, addr(sizeHigh),
  140. FILE_BEGIN)
  141. let lastErr = osLastError()
  142. if (status == INVALID_SET_FILE_POINTER and lastErr.int32 != NO_ERROR) or
  143. (setEndOfFile(result.fHandle) == 0):
  144. fail(lastErr, "error setting file size")
  145. # since the strings are always 'nil', we simply always call
  146. # CreateFileMappingW which should be slightly faster anyway:
  147. result.mapHandle = createFileMappingW(
  148. result.fHandle, nil,
  149. if readonly: PAGE_READONLY else: PAGE_READWRITE,
  150. 0, 0, nil)
  151. if result.mapHandle == 0:
  152. fail(osLastError(), "error creating mapping")
  153. result.mem = mapViewOfFileEx(
  154. result.mapHandle,
  155. if readonly: FILE_MAP_READ else: FILE_MAP_WRITE,
  156. int32(offset shr 32),
  157. int32(offset and 0xffffffff),
  158. if mappedSize == -1: 0 else: mappedSize,
  159. nil)
  160. if result.mem == nil:
  161. fail(osLastError(), "error mapping view")
  162. var hi, low: int32
  163. low = getFileSize(result.fHandle, addr(hi))
  164. if low == INVALID_FILE_SIZE:
  165. fail(osLastError(), "error getting file size")
  166. else:
  167. var fileSize = (int64(hi) shl 32) or int64(uint32(low))
  168. if mappedSize != -1: result.size = min(fileSize, mappedSize).int
  169. else: result.size = fileSize.int
  170. result.wasOpened = true
  171. if not allowRemap and result.fHandle != INVALID_HANDLE_VALUE:
  172. if closeHandle(result.fHandle) == 0:
  173. result.fHandle = INVALID_HANDLE_VALUE
  174. else:
  175. template fail(errCode: OSErrorCode, msg: string) =
  176. rollback()
  177. if result.handle != -1: discard close(result.handle)
  178. raiseOSError(errCode)
  179. var flags = if readonly: O_RDONLY else: O_RDWR
  180. if newFileSize != -1:
  181. flags = flags or O_CREAT or O_TRUNC
  182. var permissions_mode = S_IRUSR or S_IWUSR
  183. result.handle = open(filename, flags, permissions_mode)
  184. else:
  185. result.handle = open(filename, flags)
  186. if result.handle == -1:
  187. # XXX: errno is supposed to be set here
  188. # Is there an exception that wraps it?
  189. fail(osLastError(), "error opening file")
  190. if newFileSize != -1:
  191. if ftruncate(result.handle, newFileSize) == -1:
  192. fail(osLastError(), "error setting file size")
  193. if mappedSize != -1:
  194. result.size = mappedSize
  195. else:
  196. var stat: Stat
  197. if fstat(result.handle, stat) != -1:
  198. # XXX: Hmm, this could be unsafe
  199. # Why is mmap taking int anyway?
  200. result.size = int(stat.st_size)
  201. else:
  202. fail(osLastError(), "error getting file size")
  203. result.mem = mmap(
  204. nil,
  205. result.size,
  206. if readonly: PROT_READ else: PROT_READ or PROT_WRITE,
  207. if readonly: (MAP_PRIVATE or MAP_POPULATE) else: (MAP_SHARED or MAP_POPULATE),
  208. result.handle,
  209. offset)
  210. if result.mem == cast[pointer](MAP_FAILED):
  211. fail(osLastError(), "file mapping failed")
  212. if not allowRemap and result.handle != -1:
  213. if close(result.handle) == 0:
  214. result.handle = -1
  215. proc close*(f: var MemFile) =
  216. ## closes the memory mapped file `f`. All changes are written back to the
  217. ## file system, if `f` was opened with write access.
  218. var error = false
  219. var lastErr: OSErrorCode
  220. when defined(windows):
  221. if f.wasOpened:
  222. error = unmapViewOfFile(f.mem) == 0
  223. lastErr = osLastError()
  224. error = (closeHandle(f.mapHandle) == 0) or error
  225. if f.fHandle != INVALID_HANDLE_VALUE:
  226. error = (closeHandle(f.fHandle) == 0) or error
  227. else:
  228. error = munmap(f.mem, f.size) != 0
  229. lastErr = osLastError()
  230. if f.handle != -1:
  231. error = (close(f.handle) != 0) or error
  232. f.size = 0
  233. f.mem = nil
  234. when defined(windows):
  235. f.fHandle = 0
  236. f.mapHandle = 0
  237. f.wasOpened = false
  238. else:
  239. f.handle = -1
  240. if error: raiseOSError(lastErr)
  241. type MemSlice* = object ## represent slice of a MemFile for iteration over delimited lines/records
  242. data*: pointer
  243. size*: int
  244. proc `==`*(x, y: MemSlice): bool =
  245. ## Compare a pair of MemSlice for strict equality.
  246. proc memcmp(a, b: pointer, n:int):int {.importc: "memcmp",header: "string.h".}
  247. result = (x.size == y.size and memcmp(x.data, y.data, x.size) == 0)
  248. proc `$`*(ms: MemSlice): string {.inline.} =
  249. ## Return a Nim string built from a MemSlice.
  250. var buf = newString(ms.size)
  251. copyMem(addr(buf[0]), ms.data, ms.size)
  252. buf[ms.size] = '\0'
  253. result = buf
  254. iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
  255. ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`.
  256. ##
  257. ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l)
  258. ## style on on a line-by-line basis. I.e., not every line needs the same ending.
  259. ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines
  260. ## are not supported as a third option for each line. Such archaic MacOS9
  261. ## files can be handled by passing delim='\\r', eat='\\0', though.
  262. ##
  263. ## Delimiters are not part of the returned slice. A final, unterminated line
  264. ## or record is returned just like any other.
  265. ##
  266. ## Non-default delimiters can be passed to allow iteration over other sorts
  267. ## of "line-like" variable length records. Pass eat='\\0' to be strictly
  268. ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not
  269. ## supported.)
  270. ##
  271. ## This zero copy, memchr-limited interface is probably the fastest way to
  272. ## iterate over line-like records in a file. However, returned (data,size)
  273. ## objects are not Nim strings, bounds checked Nim arrays, or even terminated
  274. ## C strings. So, care is required to access the data (e.g., think C mem*
  275. ## functions, not str* functions).
  276. ##
  277. ## Example:
  278. ##
  279. ## .. code-block:: nim
  280. ## var count = 0
  281. ## for slice in memSlices(memfiles.open("foo")):
  282. ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#':
  283. ## inc(count)
  284. ## echo count
  285. proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
  286. importc: "memchr", header: "<string.h>" .}
  287. proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q)
  288. var ms: MemSlice
  289. var ending: pointer
  290. ms.data = mfile.mem
  291. var remaining = mfile.size
  292. while remaining > 0:
  293. ending = c_memchr(ms.data, delim, remaining)
  294. if ending == nil: # unterminated final slice
  295. ms.size = remaining # Weird case..check eat?
  296. yield ms
  297. break
  298. ms.size = ending -! ms.data # delim is NOT included
  299. if eat != '\0' and ms.size > 0 and cast[cstring](ms.data)[ms.size - 1] == eat:
  300. dec(ms.size) # trim pre-delim char
  301. yield ms
  302. ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim
  303. remaining = mfile.size - (ms.data -! mfile.mem)
  304. iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} =
  305. ## Replace contents of passed buffer with each new line, like
  306. ## `readLine(File) <system.html#readLine,File,TaintedString>`_.
  307. ## `delim`, `eat`, and delimiting logic is exactly as for
  308. ## `memSlices <#memSlices>`_, but Nim strings are returned.
  309. ##
  310. ## Example:
  311. ##
  312. ## .. code-block:: nim
  313. ## var buffer: TaintedString = ""
  314. ## for line in lines(memfiles.open("foo"), buffer):
  315. ## echo line
  316. for ms in memSlices(mfile, delim, eat):
  317. buf.setLen(ms.size)
  318. copyMem(addr(buf[0]), ms.data, ms.size)
  319. buf[ms.size] = '\0'
  320. yield buf
  321. iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} =
  322. ## Return each line in a file as a Nim string, like
  323. ## `lines(File) <system.html#lines.i,File>`_.
  324. ## `delim`, `eat`, and delimiting logic is exactly as for
  325. ## `memSlices <#memSlices>`_, but Nim strings are returned.
  326. ##
  327. ## Example:
  328. ##
  329. ## .. code-block:: nim
  330. ## for line in lines(memfiles.open("foo")):
  331. ## echo line
  332. var buf = TaintedString(newStringOfCap(80))
  333. for line in lines(mfile, buf, delim, eat):
  334. yield buf