subexes.nim 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Nim support for `substitution expressions`:idx: (`subex`:idx:).
  10. ##
  11. ## .. include:: ../../doc/subexes.txt
  12. ##
  13. {.push debugger:off .} # the user does not want to trace a part
  14. # of the standard library!
  15. from strutils import parseInt, cmpIgnoreStyle, Digits
  16. include "system/inclrtl"
  17. import system/helpers2
  18. proc findNormalized(x: string, inArray: openarray[string]): int =
  19. var i = 0
  20. while i < high(inArray):
  21. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  22. inc(i, 2) # incrementing by 1 would probably lead to a
  23. # security hole...
  24. return -1
  25. type
  26. SubexError* = object of ValueError ## exception that is raised for
  27. ## an invalid subex
  28. proc raiseInvalidFormat(msg: string) {.noinline.} =
  29. raise newException(SubexError, "invalid format string: " & msg)
  30. type
  31. FormatParser = object {.pure, final.}
  32. when defined(js):
  33. f: string # we rely on the '\0' terminator
  34. # which JS's native string doesn't have
  35. else:
  36. f: cstring
  37. num, i, lineLen: int
  38. template call(x: untyped): untyped =
  39. p.i = i
  40. x
  41. i = p.i
  42. template callNoLineLenTracking(x: untyped): untyped =
  43. let oldLineLen = p.lineLen
  44. p.i = i
  45. x
  46. i = p.i
  47. p.lineLen = oldLineLen
  48. proc getFormatArg(p: var FormatParser, a: openArray[string]): int =
  49. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  50. var i = p.i
  51. var f = p.f
  52. case f[i]
  53. of '#':
  54. result = p.num
  55. inc i
  56. inc p.num
  57. of '1'..'9', '-':
  58. var j = 0
  59. var negative = f[i] == '-'
  60. if negative: inc i
  61. while f[i] in Digits:
  62. j = j * 10 + ord(f[i]) - ord('0')
  63. inc i
  64. result = if not negative: j-1 else: a.len-j
  65. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  66. var name = ""
  67. while f[i] in PatternChars:
  68. name.add(f[i])
  69. inc(i)
  70. result = findNormalized(name, a)+1
  71. of '$':
  72. inc(i)
  73. call:
  74. result = getFormatArg(p, a)
  75. result = parseInt(a[result])-1
  76. else:
  77. raiseInvalidFormat("'#', '$', number or identifier expected")
  78. if result >=% a.len: raiseInvalidFormat(formatErrorIndexBound(result, a.len))
  79. p.i = i
  80. proc scanDollar(p: var FormatParser, a: openarray[string], s: var string) {.
  81. noSideEffect.}
  82. proc emitChar(p: var FormatParser, x: var string, ch: char) {.inline.} =
  83. x.add(ch)
  84. if ch == '\L': p.lineLen = 0
  85. else: inc p.lineLen
  86. proc emitStrLinear(p: var FormatParser, x: var string, y: string) {.inline.} =
  87. for ch in items(y): emitChar(p, x, ch)
  88. proc emitStr(p: var FormatParser, x: var string, y: string) {.inline.} =
  89. x.add(y)
  90. inc p.lineLen, y.len
  91. proc scanQuote(p: var FormatParser, x: var string, toAdd: bool) =
  92. var i = p.i+1
  93. var f = p.f
  94. while true:
  95. if f[i] == '\'':
  96. inc i
  97. if f[i] != '\'': break
  98. inc i
  99. if toAdd: emitChar(p, x, '\'')
  100. elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected")
  101. else:
  102. if toAdd: emitChar(p, x, f[i])
  103. inc i
  104. p.i = i
  105. proc scanBranch(p: var FormatParser, a: openArray[string],
  106. x: var string, choice: int) =
  107. var i = p.i
  108. var f = p.f
  109. var c = 0
  110. var elsePart = i
  111. var toAdd = choice == 0
  112. while true:
  113. case f[i]
  114. of ']': break
  115. of '|':
  116. inc i
  117. elsePart = i
  118. inc c
  119. if toAdd: break
  120. toAdd = choice == c
  121. of '\'':
  122. call: scanQuote(p, x, toAdd)
  123. of '\0': raiseInvalidFormat("closing ']' expected")
  124. else:
  125. if toAdd:
  126. if f[i] == '$':
  127. inc i
  128. call: scanDollar(p, a, x)
  129. else:
  130. emitChar(p, x, f[i])
  131. inc i
  132. else:
  133. inc i
  134. if not toAdd and choice >= 0:
  135. # evaluate 'else' part:
  136. var last = i
  137. i = elsePart
  138. while true:
  139. case f[i]
  140. of '|', ']': break
  141. of '\'':
  142. call: scanQuote(p, x, true)
  143. of '$':
  144. inc i
  145. call: scanDollar(p, a, x)
  146. else:
  147. emitChar(p, x, f[i])
  148. inc i
  149. i = last
  150. p.i = i+1
  151. proc scanSlice(p: var FormatParser, a: openarray[string]): tuple[x, y: int] =
  152. var slice = false
  153. var i = p.i
  154. var f = p.f
  155. if f[i] == '{': inc i
  156. else: raiseInvalidFormat("'{' expected")
  157. if f[i] == '.' and f[i+1] == '.':
  158. inc i, 2
  159. slice = true
  160. else:
  161. call: result.x = getFormatArg(p, a)
  162. if f[i] == '.' and f[i+1] == '.':
  163. inc i, 2
  164. slice = true
  165. if slice:
  166. if f[i] != '}':
  167. call: result.y = getFormatArg(p, a)
  168. else:
  169. result.y = high(a)
  170. else:
  171. result.y = result.x
  172. if f[i] != '}': raiseInvalidFormat("'}' expected")
  173. inc i
  174. p.i = i
  175. proc scanDollar(p: var FormatParser, a: openarray[string], s: var string) =
  176. var i = p.i
  177. var f = p.f
  178. case f[i]
  179. of '$':
  180. emitChar p, s, '$'
  181. inc i
  182. of '*':
  183. for j in 0..a.high: emitStr p, s, a[j]
  184. inc i
  185. of '{':
  186. call:
  187. let (x, y) = scanSlice(p, a)
  188. for j in x..y: emitStr p, s, a[j]
  189. of '[':
  190. inc i
  191. var start = i
  192. call: scanBranch(p, a, s, -1)
  193. var x: int
  194. if f[i] == '{':
  195. inc i
  196. call: x = getFormatArg(p, a)
  197. if f[i] != '}': raiseInvalidFormat("'}' expected")
  198. inc i
  199. else:
  200. call: x = getFormatArg(p, a)
  201. var last = i
  202. let choice = parseInt(a[x])
  203. i = start
  204. call: scanBranch(p, a, s, choice)
  205. i = last
  206. of '\'':
  207. var sep = ""
  208. callNoLineLenTracking: scanQuote(p, sep, true)
  209. if f[i] == '~':
  210. # $' '~{1..3}
  211. # insert space followed by 1..3 if not empty
  212. inc i
  213. call:
  214. let (x, y) = scanSlice(p, a)
  215. var L = 0
  216. for j in x..y: inc L, a[j].len
  217. if L > 0:
  218. emitStrLinear p, s, sep
  219. for j in x..y: emitStr p, s, a[j]
  220. else:
  221. block StringJoin:
  222. block OptionalLineLengthSpecifier:
  223. var maxLen = 0
  224. case f[i]
  225. of '0'..'9':
  226. while f[i] in Digits:
  227. maxLen = maxLen * 10 + ord(f[i]) - ord('0')
  228. inc i
  229. of '$':
  230. # do not skip the '$' here for `getFormatArg`!
  231. call:
  232. maxLen = getFormatArg(p, a)
  233. else: break OptionalLineLengthSpecifier
  234. var indent = ""
  235. case f[i]
  236. of 'i':
  237. inc i
  238. callNoLineLenTracking: scanQuote(p, indent, true)
  239. call:
  240. let (x, y) = scanSlice(p, a)
  241. if maxLen < 1: emitStrLinear(p, s, indent)
  242. var items = 1
  243. emitStr p, s, a[x]
  244. for j in x+1..y:
  245. emitStr p, s, sep
  246. if items >= maxLen:
  247. emitStrLinear p, s, indent
  248. items = 0
  249. emitStr p, s, a[j]
  250. inc items
  251. of 'c':
  252. inc i
  253. callNoLineLenTracking: scanQuote(p, indent, true)
  254. call:
  255. let (x, y) = scanSlice(p, a)
  256. if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent)
  257. emitStr p, s, a[x]
  258. for j in x+1..y:
  259. emitStr p, s, sep
  260. if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent)
  261. emitStr p, s, a[j]
  262. else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected")
  263. break StringJoin
  264. call:
  265. let (x, y) = scanSlice(p, a)
  266. emitStr p, s, a[x]
  267. for j in x+1..y:
  268. emitStr p, s, sep
  269. emitStr p, s, a[j]
  270. else:
  271. call:
  272. var x = getFormatArg(p, a)
  273. emitStr p, s, a[x]
  274. p.i = i
  275. type
  276. Subex* = distinct string ## string that contains a substitution expression
  277. proc subex*(s: string): Subex =
  278. ## constructs a *substitution expression* from `s`. Currently this performs
  279. ## no syntax checking but this may change in later versions.
  280. result = Subex(s)
  281. proc addf*(s: var string, formatstr: Subex, a: varargs[string, `$`]) {.
  282. noSideEffect, rtl, extern: "nfrmtAddf".} =
  283. ## The same as ``add(s, formatstr % a)``, but more efficient.
  284. var p: FormatParser
  285. p.f = formatstr.string
  286. var i = 0
  287. while i < len(formatstr.string):
  288. if p.f[i] == '$':
  289. inc i
  290. call: scanDollar(p, a, s)
  291. else:
  292. emitChar(p, s, p.f[i])
  293. inc(i)
  294. proc `%` *(formatstr: Subex, a: openarray[string]): string {.noSideEffect,
  295. rtl, extern: "nfrmtFormatOpenArray".} =
  296. ## The `substitution`:idx: operator performs string substitutions in
  297. ## `formatstr` and returns a modified `formatstr`. This is often called
  298. ## `string interpolation`:idx:.
  299. ##
  300. result = newStringOfCap(formatstr.string.len + a.len shl 4)
  301. addf(result, formatstr, a)
  302. proc `%` *(formatstr: Subex, a: string): string {.noSideEffect,
  303. rtl, extern: "nfrmtFormatSingleElem".} =
  304. ## This is the same as ``formatstr % [a]``.
  305. result = newStringOfCap(formatstr.string.len + a.len)
  306. addf(result, formatstr, [a])
  307. proc format*(formatstr: Subex, a: varargs[string, `$`]): string {.noSideEffect,
  308. rtl, extern: "nfrmtFormatVarargs".} =
  309. ## The `substitution`:idx: operator performs string substitutions in
  310. ## `formatstr` and returns a modified `formatstr`. This is often called
  311. ## `string interpolation`:idx:.
  312. ##
  313. result = newStringOfCap(formatstr.string.len + a.len shl 4)
  314. addf(result, formatstr, a)
  315. {.pop.}
  316. when isMainModule:
  317. from strutils import replace
  318. proc `%`(formatstr: string, a: openarray[string]): string =
  319. result = newStringOfCap(formatstr.len + a.len shl 4)
  320. addf(result, formatstr.Subex, a)
  321. proc `%`(formatstr: string, a: string): string =
  322. result = newStringOfCap(formatstr.len + a.len)
  323. addf(result, formatstr.Subex, [a])
  324. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  325. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  326. "The cat eats fish."
  327. doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c"
  328. doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c"
  329. doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c"
  330. doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c"
  331. doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c"
  332. doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)"
  333. doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3"
  334. doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)"
  335. doAssert "$' '~{..}" % "" == ""
  336. doAssert "$' '~{..}" % "P0" == " P0"
  337. doAssert "${$1}" % "1" == "1"
  338. doAssert "${$$-1} $$1" % "1" == "1 $1"
  339. doAssert(("$#($', '10c'\n '{#..})" % ["doAssert", "longishA", "longish"]).replace(" \n", "\n") ==
  340. """doAssert(
  341. longishA,
  342. longish)""")
  343. doAssert(("type MyEnum* = enum\n $', '2i'\n '{..}" % ["fieldA",
  344. "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"]).replace(" \n", "\n") ==
  345. strutils.unindent("""
  346. type MyEnum* = enum
  347. fieldA, fieldB,
  348. FiledClkad, fieldD,
  349. fieldE, longishFieldName""", 6))
  350. doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)"
  351. doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied"
  352. doAssert subex"$['''|'|''''|']']#" % "0" == "'|"
  353. doAssert((subex("type\n Enum = enum\n $', '40c'\n '{..}") % [
  354. "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"]).replace(" \n", "\n") ==
  355. strutils.unindent("""
  356. type
  357. Enum = enum
  358. fieldNameA, fieldNameB, fieldNameC,
  359. fieldNameD""", 6))