subexes.nim 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Nim support for `substitution expressions`:idx: (`subex`:idx:).
  10. ##
  11. ## .. include:: ../../doc/subexes.txt
  12. ##
  13. {.push debugger:off .} # the user does not want to trace a part
  14. # of the standard library!
  15. from strutils import parseInt, cmpIgnoreStyle, Digits
  16. include "system/inclrtl"
  17. proc findNormalized(x: string, inArray: openarray[string]): int =
  18. var i = 0
  19. while i < high(inArray):
  20. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  21. inc(i, 2) # incrementing by 1 would probably lead to a
  22. # security hole...
  23. return -1
  24. type
  25. SubexError* = object of ValueError ## exception that is raised for
  26. ## an invalid subex
  27. proc raiseInvalidFormat(msg: string) {.noinline.} =
  28. raise newException(SubexError, "invalid format string: " & msg)
  29. type
  30. FormatParser = object {.pure, final.}
  31. when defined(js):
  32. f: string # we rely on the '\0' terminator
  33. # which JS's native string doesn't have
  34. else:
  35. f: cstring
  36. num, i, lineLen: int
  37. template call(x: untyped): untyped =
  38. p.i = i
  39. x
  40. i = p.i
  41. template callNoLineLenTracking(x: untyped): untyped =
  42. let oldLineLen = p.lineLen
  43. p.i = i
  44. x
  45. i = p.i
  46. p.lineLen = oldLineLen
  47. proc getFormatArg(p: var FormatParser, a: openArray[string]): int =
  48. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  49. var i = p.i
  50. var f = p.f
  51. case f[i]
  52. of '#':
  53. result = p.num
  54. inc i
  55. inc p.num
  56. of '1'..'9', '-':
  57. var j = 0
  58. var negative = f[i] == '-'
  59. if negative: inc i
  60. while f[i] in Digits:
  61. j = j * 10 + ord(f[i]) - ord('0')
  62. inc i
  63. result = if not negative: j-1 else: a.len-j
  64. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  65. var name = ""
  66. while f[i] in PatternChars:
  67. name.add(f[i])
  68. inc(i)
  69. result = findNormalized(name, a)+1
  70. of '$':
  71. inc(i)
  72. call:
  73. result = getFormatArg(p, a)
  74. result = parseInt(a[result])-1
  75. else:
  76. raiseInvalidFormat("'#', '$', number or identifier expected")
  77. if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result)
  78. p.i = i
  79. proc scanDollar(p: var FormatParser, a: openarray[string], s: var string) {.
  80. noSideEffect.}
  81. proc emitChar(p: var FormatParser, x: var string, ch: char) {.inline.} =
  82. x.add(ch)
  83. if ch == '\L': p.lineLen = 0
  84. else: inc p.lineLen
  85. proc emitStrLinear(p: var FormatParser, x: var string, y: string) {.inline.} =
  86. for ch in items(y): emitChar(p, x, ch)
  87. proc emitStr(p: var FormatParser, x: var string, y: string) {.inline.} =
  88. x.add(y)
  89. inc p.lineLen, y.len
  90. proc scanQuote(p: var FormatParser, x: var string, toAdd: bool) =
  91. var i = p.i+1
  92. var f = p.f
  93. while true:
  94. if f[i] == '\'':
  95. inc i
  96. if f[i] != '\'': break
  97. inc i
  98. if toAdd: emitChar(p, x, '\'')
  99. elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected")
  100. else:
  101. if toAdd: emitChar(p, x, f[i])
  102. inc i
  103. p.i = i
  104. proc scanBranch(p: var FormatParser, a: openArray[string],
  105. x: var string, choice: int) =
  106. var i = p.i
  107. var f = p.f
  108. var c = 0
  109. var elsePart = i
  110. var toAdd = choice == 0
  111. while true:
  112. case f[i]
  113. of ']': break
  114. of '|':
  115. inc i
  116. elsePart = i
  117. inc c
  118. if toAdd: break
  119. toAdd = choice == c
  120. of '\'':
  121. call: scanQuote(p, x, toAdd)
  122. of '\0': raiseInvalidFormat("closing ']' expected")
  123. else:
  124. if toAdd:
  125. if f[i] == '$':
  126. inc i
  127. call: scanDollar(p, a, x)
  128. else:
  129. emitChar(p, x, f[i])
  130. inc i
  131. else:
  132. inc i
  133. if not toAdd and choice >= 0:
  134. # evaluate 'else' part:
  135. var last = i
  136. i = elsePart
  137. while true:
  138. case f[i]
  139. of '|', ']': break
  140. of '\'':
  141. call: scanQuote(p, x, true)
  142. of '$':
  143. inc i
  144. call: scanDollar(p, a, x)
  145. else:
  146. emitChar(p, x, f[i])
  147. inc i
  148. i = last
  149. p.i = i+1
  150. proc scanSlice(p: var FormatParser, a: openarray[string]): tuple[x, y: int] =
  151. var slice = false
  152. var i = p.i
  153. var f = p.f
  154. if f[i] == '{': inc i
  155. else: raiseInvalidFormat("'{' expected")
  156. if f[i] == '.' and f[i+1] == '.':
  157. inc i, 2
  158. slice = true
  159. else:
  160. call: result.x = getFormatArg(p, a)
  161. if f[i] == '.' and f[i+1] == '.':
  162. inc i, 2
  163. slice = true
  164. if slice:
  165. if f[i] != '}':
  166. call: result.y = getFormatArg(p, a)
  167. else:
  168. result.y = high(a)
  169. else:
  170. result.y = result.x
  171. if f[i] != '}': raiseInvalidFormat("'}' expected")
  172. inc i
  173. p.i = i
  174. proc scanDollar(p: var FormatParser, a: openarray[string], s: var string) =
  175. var i = p.i
  176. var f = p.f
  177. case f[i]
  178. of '$':
  179. emitChar p, s, '$'
  180. inc i
  181. of '*':
  182. for j in 0..a.high: emitStr p, s, a[j]
  183. inc i
  184. of '{':
  185. call:
  186. let (x, y) = scanSlice(p, a)
  187. for j in x..y: emitStr p, s, a[j]
  188. of '[':
  189. inc i
  190. var start = i
  191. call: scanBranch(p, a, s, -1)
  192. var x: int
  193. if f[i] == '{':
  194. inc i
  195. call: x = getFormatArg(p, a)
  196. if f[i] != '}': raiseInvalidFormat("'}' expected")
  197. inc i
  198. else:
  199. call: x = getFormatArg(p, a)
  200. var last = i
  201. let choice = parseInt(a[x])
  202. i = start
  203. call: scanBranch(p, a, s, choice)
  204. i = last
  205. of '\'':
  206. var sep = ""
  207. callNoLineLenTracking: scanQuote(p, sep, true)
  208. if f[i] == '~':
  209. # $' '~{1..3}
  210. # insert space followed by 1..3 if not empty
  211. inc i
  212. call:
  213. let (x, y) = scanSlice(p, a)
  214. var L = 0
  215. for j in x..y: inc L, a[j].len
  216. if L > 0:
  217. emitStrLinear p, s, sep
  218. for j in x..y: emitStr p, s, a[j]
  219. else:
  220. block StringJoin:
  221. block OptionalLineLengthSpecifier:
  222. var maxLen = 0
  223. case f[i]
  224. of '0'..'9':
  225. while f[i] in Digits:
  226. maxLen = maxLen * 10 + ord(f[i]) - ord('0')
  227. inc i
  228. of '$':
  229. # do not skip the '$' here for `getFormatArg`!
  230. call:
  231. maxLen = getFormatArg(p, a)
  232. else: break OptionalLineLengthSpecifier
  233. var indent = ""
  234. case f[i]
  235. of 'i':
  236. inc i
  237. callNoLineLenTracking: scanQuote(p, indent, true)
  238. call:
  239. let (x, y) = scanSlice(p, a)
  240. if maxLen < 1: emitStrLinear(p, s, indent)
  241. var items = 1
  242. emitStr p, s, a[x]
  243. for j in x+1..y:
  244. emitStr p, s, sep
  245. if items >= maxLen:
  246. emitStrLinear p, s, indent
  247. items = 0
  248. emitStr p, s, a[j]
  249. inc items
  250. of 'c':
  251. inc i
  252. callNoLineLenTracking: scanQuote(p, indent, true)
  253. call:
  254. let (x, y) = scanSlice(p, a)
  255. if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent)
  256. emitStr p, s, a[x]
  257. for j in x+1..y:
  258. emitStr p, s, sep
  259. if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent)
  260. emitStr p, s, a[j]
  261. else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected")
  262. break StringJoin
  263. call:
  264. let (x, y) = scanSlice(p, a)
  265. emitStr p, s, a[x]
  266. for j in x+1..y:
  267. emitStr p, s, sep
  268. emitStr p, s, a[j]
  269. else:
  270. call:
  271. var x = getFormatArg(p, a)
  272. emitStr p, s, a[x]
  273. p.i = i
  274. type
  275. Subex* = distinct string ## string that contains a substitution expression
  276. {.deprecated: [TSubex: Subex].}
  277. proc subex*(s: string): Subex =
  278. ## constructs a *substitution expression* from `s`. Currently this performs
  279. ## no syntax checking but this may change in later versions.
  280. result = Subex(s)
  281. proc addf*(s: var string, formatstr: Subex, a: varargs[string, `$`]) {.
  282. noSideEffect, rtl, extern: "nfrmtAddf".} =
  283. ## The same as ``add(s, formatstr % a)``, but more efficient.
  284. var p: FormatParser
  285. p.f = formatstr.string
  286. var i = 0
  287. while i < len(formatstr.string):
  288. if p.f[i] == '$':
  289. inc i
  290. call: scanDollar(p, a, s)
  291. else:
  292. emitChar(p, s, p.f[i])
  293. inc(i)
  294. proc `%` *(formatstr: Subex, a: openarray[string]): string {.noSideEffect,
  295. rtl, extern: "nfrmtFormatOpenArray".} =
  296. ## The `substitution`:idx: operator performs string substitutions in
  297. ## `formatstr` and returns a modified `formatstr`. This is often called
  298. ## `string interpolation`:idx:.
  299. ##
  300. result = newStringOfCap(formatstr.string.len + a.len shl 4)
  301. addf(result, formatstr, a)
  302. proc `%` *(formatstr: Subex, a: string): string {.noSideEffect,
  303. rtl, extern: "nfrmtFormatSingleElem".} =
  304. ## This is the same as ``formatstr % [a]``.
  305. result = newStringOfCap(formatstr.string.len + a.len)
  306. addf(result, formatstr, [a])
  307. proc format*(formatstr: Subex, a: varargs[string, `$`]): string {.noSideEffect,
  308. rtl, extern: "nfrmtFormatVarargs".} =
  309. ## The `substitution`:idx: operator performs string substitutions in
  310. ## `formatstr` and returns a modified `formatstr`. This is often called
  311. ## `string interpolation`:idx:.
  312. ##
  313. result = newStringOfCap(formatstr.string.len + a.len shl 4)
  314. addf(result, formatstr, a)
  315. {.pop.}
  316. when isMainModule:
  317. from strutils import replace
  318. proc `%`(formatstr: string, a: openarray[string]): string =
  319. result = newStringOfCap(formatstr.len + a.len shl 4)
  320. addf(result, formatstr.Subex, a)
  321. proc `%`(formatstr: string, a: string): string =
  322. result = newStringOfCap(formatstr.len + a.len)
  323. addf(result, formatstr.Subex, [a])
  324. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  325. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  326. "The cat eats fish."
  327. doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c"
  328. doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c"
  329. doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c"
  330. doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c"
  331. doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c"
  332. doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)"
  333. doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3"
  334. doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)"
  335. doAssert "$' '~{..}" % "" == ""
  336. doAssert "$' '~{..}" % "P0" == " P0"
  337. doAssert "${$1}" % "1" == "1"
  338. doAssert "${$$-1} $$1" % "1" == "1 $1"
  339. doAssert(("$#($', '10c'\n '{#..})" % ["doAssert", "longishA", "longish"]).replace(" \n", "\n") ==
  340. """doAssert(
  341. longishA,
  342. longish)""")
  343. doAssert(("type MyEnum* = enum\n $', '2i'\n '{..}" % ["fieldA",
  344. "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"]).replace(" \n", "\n") ==
  345. strutils.unindent("""
  346. type MyEnum* = enum
  347. fieldA, fieldB,
  348. FiledClkad, fieldD,
  349. fieldE, longishFieldName""", 6))
  350. doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)"
  351. doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied"
  352. doAssert subex"$['''|'|''''|']']#" % "0" == "'|"
  353. doAssert((subex("type\n Enum = enum\n $', '40c'\n '{..}") % [
  354. "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"]).replace(" \n", "\n") ==
  355. strutils.unindent("""
  356. type
  357. Enum = enum
  358. fieldNameA, fieldNameB, fieldNameC,
  359. fieldNameD""", 6))