rodfiles.nim 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #
  2. #
  3. # The Nim Compiler
  4. # (c) Copyright 2020 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Low level binary format used by the compiler to store and load various AST
  10. ## and related data.
  11. ##
  12. ## NB: this is incredibly low level and if you're interested in how the
  13. ## compiler works and less a storage format, you're probably looking for
  14. ## the `ic` or `packed_ast` modules to understand the logical format.
  15. from std/typetraits import supportsCopyMem
  16. when defined(nimPreviewSlimSystem):
  17. import std/[syncio, assertions]
  18. import std / tables
  19. ## Overview
  20. ## ========
  21. ## `RodFile` represents a Rod File (versioned binary format), and the
  22. ## associated data for common interactions such as IO and error tracking
  23. ## (`RodFileError`). The file format broken up into sections (`RodSection`)
  24. ## and preceded by a header (see: `cookie`). The precise layout, section
  25. ## ordering and data following the section are determined by the user. See
  26. ## `ic.loadRodFile`.
  27. ##
  28. ## A basic but "wrong" example of the lifecycle:
  29. ## ---------------------------------------------
  30. ## 1. `create` or `open` - create a new one or open an existing
  31. ## 2. `storeHeader` - header info
  32. ## 3. `storePrim` or `storeSeq` - save your stuff
  33. ## 4. `close` - and we're done
  34. ##
  35. ## Now read the bits below to understand what's missing.
  36. ##
  37. ## ### Issues with the Example
  38. ## Missing Sections:
  39. ## This is a low level API, so headers and sections need to be stored and
  40. ## loaded by the user, see `storeHeader` & `loadHeader` and `storeSection` &
  41. ## `loadSection`, respectively.
  42. ##
  43. ## No Error Handling:
  44. ## The API is centered around IO and prone to error, each operation checks or
  45. ## sets the `RodFile.err` field. A user of this API needs to handle these
  46. ## appropriately.
  47. ##
  48. ## API Notes
  49. ## =========
  50. ##
  51. ## Valid inputs for Rod files
  52. ## --------------------------
  53. ## ASTs, hopes, dreams, and anything as long as it and any children it may have
  54. ## support `copyMem`. This means anything that is not a pointer and that does not contain a pointer. At a glance these are:
  55. ## * string
  56. ## * objects & tuples (fields are recursed)
  57. ## * sequences AKA `seq[T]`
  58. ##
  59. ## Note on error handling style
  60. ## ----------------------------
  61. ## A flag based approach is used where operations no-op in case of a
  62. ## preexisting error and set the flag if they encounter one.
  63. ##
  64. ## Misc
  65. ## ----
  66. ## * 'Prim' is short for 'primitive', as in a non-sequence type
  67. type
  68. RodSection* = enum
  69. versionSection
  70. configSection
  71. stringsSection
  72. checkSumsSection
  73. depsSection
  74. numbersSection
  75. exportsSection
  76. hiddenSection
  77. reexportsSection
  78. compilerProcsSection
  79. trmacrosSection
  80. convertersSection
  81. methodsSection
  82. pureEnumsSection
  83. toReplaySection
  84. topLevelSection
  85. bodiesSection
  86. symsSection
  87. typesSection
  88. typeInstCacheSection
  89. procInstCacheSection
  90. attachedOpsSection
  91. methodsPerGenericTypeSection
  92. enumToStringProcsSection
  93. methodsPerTypeSection
  94. dispatchersSection
  95. typeInfoSection # required by the backend
  96. backendFlagsSection
  97. aliveSymsSection # beware, this is stored in a `.alivesyms` file.
  98. sideChannelSection
  99. namespaceSection
  100. symnamesSection
  101. RodFileError* = enum
  102. ok, tooBig, cannotOpen, ioFailure, wrongHeader, wrongSection, configMismatch,
  103. includeFileChanged
  104. RodFile* = object
  105. f*: File
  106. currentSection*: RodSection # for error checking
  107. err*: RodFileError # little experiment to see if this works
  108. # better than exceptions.
  109. const
  110. RodVersion = 2
  111. defaultCookie = [byte(0), byte('R'), byte('O'), byte('D'),
  112. byte(sizeof(int)*8), byte(system.cpuEndian), byte(0), byte(RodVersion)]
  113. proc setError(f: var RodFile; err: RodFileError) {.inline.} =
  114. f.err = err
  115. #raise newException(IOError, "IO error")
  116. proc storePrim*(f: var RodFile; s: string) =
  117. ## Stores a string.
  118. ## The len is prefixed to allow for later retreival.
  119. if f.err != ok: return
  120. if s.len >= high(int32):
  121. setError f, tooBig
  122. return
  123. var lenPrefix = int32(s.len)
  124. if writeBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix):
  125. setError f, ioFailure
  126. else:
  127. if s.len != 0:
  128. if writeBuffer(f.f, unsafeAddr(s[0]), s.len) != s.len:
  129. setError f, ioFailure
  130. proc storePrim*[T](f: var RodFile; x: T) =
  131. ## Stores a non-sequence/string `T`.
  132. ## If `T` doesn't support `copyMem` and is an object or tuple then the fields
  133. ## are written -- the user from context will need to know which `T` to load.
  134. if f.err != ok: return
  135. when supportsCopyMem(T):
  136. if writeBuffer(f.f, unsafeAddr(x), sizeof(x)) != sizeof(x):
  137. setError f, ioFailure
  138. elif T is tuple:
  139. for y in fields(x):
  140. storePrim(f, y)
  141. elif T is object:
  142. for y in fields(x):
  143. when y is seq:
  144. storeSeq(f, y)
  145. else:
  146. storePrim(f, y)
  147. else:
  148. {.error: "unsupported type for 'storePrim'".}
  149. proc storeSeq*[T](f: var RodFile; s: seq[T]) =
  150. ## Stores a sequence of `T`s, with the len as a prefix for later retrieval.
  151. if f.err != ok: return
  152. if s.len >= high(int32):
  153. setError f, tooBig
  154. return
  155. var lenPrefix = int32(s.len)
  156. if writeBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix):
  157. setError f, ioFailure
  158. else:
  159. for i in 0..<s.len:
  160. storePrim(f, s[i])
  161. proc storeOrderedTable*[K, T](f: var RodFile; s: OrderedTable[K, T]) =
  162. if f.err != ok: return
  163. if s.len >= high(int32):
  164. setError f, tooBig
  165. return
  166. var lenPrefix = int32(s.len)
  167. if writeBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix):
  168. setError f, ioFailure
  169. else:
  170. for _, v in s:
  171. storePrim(f, v)
  172. proc loadPrim*(f: var RodFile; s: var string) =
  173. ## Read a string, the length was stored as a prefix
  174. if f.err != ok: return
  175. var lenPrefix = int32(0)
  176. if readBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix):
  177. setError f, ioFailure
  178. else:
  179. s = newString(lenPrefix)
  180. if lenPrefix > 0:
  181. if readBuffer(f.f, unsafeAddr(s[0]), s.len) != s.len:
  182. setError f, ioFailure
  183. proc loadPrim*[T](f: var RodFile; x: var T) =
  184. ## Load a non-sequence/string `T`.
  185. if f.err != ok: return
  186. when supportsCopyMem(T):
  187. if readBuffer(f.f, unsafeAddr(x), sizeof(x)) != sizeof(x):
  188. setError f, ioFailure
  189. elif T is tuple:
  190. for y in fields(x):
  191. loadPrim(f, y)
  192. elif T is object:
  193. for y in fields(x):
  194. when y is seq:
  195. loadSeq(f, y)
  196. else:
  197. loadPrim(f, y)
  198. else:
  199. {.error: "unsupported type for 'loadPrim'".}
  200. proc loadSeq*[T](f: var RodFile; s: var seq[T]) =
  201. ## `T` must be compatible with `copyMem`, see `loadPrim`
  202. if f.err != ok: return
  203. var lenPrefix = int32(0)
  204. if readBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix):
  205. setError f, ioFailure
  206. else:
  207. s = newSeq[T](lenPrefix)
  208. for i in 0..<lenPrefix:
  209. loadPrim(f, s[i])
  210. proc loadOrderedTable*[K, T](f: var RodFile; s: var OrderedTable[K, T]) =
  211. ## `T` must be compatible with `copyMem`, see `loadPrim`
  212. if f.err != ok: return
  213. var lenPrefix = int32(0)
  214. if readBuffer(f.f, addr lenPrefix, sizeof(lenPrefix)) != sizeof(lenPrefix):
  215. setError f, ioFailure
  216. else:
  217. s = initOrderedTable[K, T](lenPrefix)
  218. for i in 0..<lenPrefix:
  219. var x = default T
  220. loadPrim(f, x)
  221. s[x.id] = x
  222. proc storeHeader*(f: var RodFile; cookie = defaultCookie) =
  223. ## stores the header which is described by `cookie`.
  224. if f.err != ok: return
  225. if f.f.writeBytes(cookie, 0, cookie.len) != cookie.len:
  226. setError f, ioFailure
  227. proc loadHeader*(f: var RodFile; cookie = defaultCookie) =
  228. ## Loads the header which is described by `cookie`.
  229. if f.err != ok: return
  230. var thisCookie: array[cookie.len, byte] = default(array[cookie.len, byte])
  231. if f.f.readBytes(thisCookie, 0, thisCookie.len) != thisCookie.len:
  232. setError f, ioFailure
  233. elif thisCookie != cookie:
  234. setError f, wrongHeader
  235. proc storeSection*(f: var RodFile; s: RodSection) =
  236. ## update `currentSection` and writes the bytes value of s.
  237. if f.err != ok: return
  238. assert f.currentSection < s
  239. f.currentSection = s
  240. storePrim(f, s)
  241. proc loadSection*(f: var RodFile; expected: RodSection) =
  242. ## read the bytes value of s, sets and error if the section is incorrect.
  243. if f.err != ok: return
  244. var s: RodSection = default(RodSection)
  245. loadPrim(f, s)
  246. if expected != s and f.err == ok:
  247. setError f, wrongSection
  248. proc create*(filename: string): RodFile =
  249. ## create the file and open it for writing
  250. result = default(RodFile)
  251. if not open(result.f, filename, fmWrite):
  252. setError result, cannotOpen
  253. proc close*(f: var RodFile) = close(f.f)
  254. proc open*(filename: string): RodFile =
  255. ## open the file for reading
  256. result = default(RodFile)
  257. if not open(result.f, filename, fmRead):
  258. setError result, cannotOpen