subexes.nim 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## Nim support for `substitution expressions`:idx: (`subex`:idx:).
  10. ##
  11. ## .. include:: ../../doc/subexes.txt
  12. ##
  13. {.push debugger:off .} # the user does not want to trace a part
  14. # of the standard library!
  15. from strutils import parseInt, cmpIgnoreStyle, Digits
  16. include "system/inclrtl"
  17. proc findNormalized(x: string, inArray: openarray[string]): int =
  18. var i = 0
  19. while i < high(inArray):
  20. if cmpIgnoreStyle(x, inArray[i]) == 0: return i
  21. inc(i, 2) # incrementing by 1 would probably lead to a
  22. # security hole...
  23. return -1
  24. type
  25. SubexError* = object of ValueError ## exception that is raised for
  26. ## an invalid subex
  27. {.deprecated: [EInvalidSubex: SubexError].}
  28. proc raiseInvalidFormat(msg: string) {.noinline.} =
  29. raise newException(SubexError, "invalid format string: " & msg)
  30. type
  31. FormatParser = object {.pure, final.}
  32. when defined(js):
  33. f: string # we rely on the '\0' terminator
  34. # which JS's native string doesn't have
  35. else:
  36. f: cstring
  37. num, i, lineLen: int
  38. {.deprecated: [TFormatParser: FormatParser].}
  39. template call(x: untyped): untyped =
  40. p.i = i
  41. x
  42. i = p.i
  43. template callNoLineLenTracking(x: untyped): untyped =
  44. let oldLineLen = p.lineLen
  45. p.i = i
  46. x
  47. i = p.i
  48. p.lineLen = oldLineLen
  49. proc getFormatArg(p: var FormatParser, a: openArray[string]): int =
  50. const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
  51. var i = p.i
  52. var f = p.f
  53. case f[i]
  54. of '#':
  55. result = p.num
  56. inc i
  57. inc p.num
  58. of '1'..'9', '-':
  59. var j = 0
  60. var negative = f[i] == '-'
  61. if negative: inc i
  62. while f[i] in Digits:
  63. j = j * 10 + ord(f[i]) - ord('0')
  64. inc i
  65. result = if not negative: j-1 else: a.len-j
  66. of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
  67. var name = ""
  68. while f[i] in PatternChars:
  69. name.add(f[i])
  70. inc(i)
  71. result = findNormalized(name, a)+1
  72. of '$':
  73. inc(i)
  74. call:
  75. result = getFormatArg(p, a)
  76. result = parseInt(a[result])-1
  77. else:
  78. raiseInvalidFormat("'#', '$', number or identifier expected")
  79. if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result)
  80. p.i = i
  81. proc scanDollar(p: var FormatParser, a: openarray[string], s: var string) {.
  82. noSideEffect.}
  83. proc emitChar(p: var FormatParser, x: var string, ch: char) {.inline.} =
  84. x.add(ch)
  85. if ch == '\L': p.lineLen = 0
  86. else: inc p.lineLen
  87. proc emitStrLinear(p: var FormatParser, x: var string, y: string) {.inline.} =
  88. for ch in items(y): emitChar(p, x, ch)
  89. proc emitStr(p: var FormatParser, x: var string, y: string) {.inline.} =
  90. x.add(y)
  91. inc p.lineLen, y.len
  92. proc scanQuote(p: var FormatParser, x: var string, toAdd: bool) =
  93. var i = p.i+1
  94. var f = p.f
  95. while true:
  96. if f[i] == '\'':
  97. inc i
  98. if f[i] != '\'': break
  99. inc i
  100. if toAdd: emitChar(p, x, '\'')
  101. elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected")
  102. else:
  103. if toAdd: emitChar(p, x, f[i])
  104. inc i
  105. p.i = i
  106. proc scanBranch(p: var FormatParser, a: openArray[string],
  107. x: var string, choice: int) =
  108. var i = p.i
  109. var f = p.f
  110. var c = 0
  111. var elsePart = i
  112. var toAdd = choice == 0
  113. while true:
  114. case f[i]
  115. of ']': break
  116. of '|':
  117. inc i
  118. elsePart = i
  119. inc c
  120. if toAdd: break
  121. toAdd = choice == c
  122. of '\'':
  123. call: scanQuote(p, x, toAdd)
  124. of '\0': raiseInvalidFormat("closing ']' expected")
  125. else:
  126. if toAdd:
  127. if f[i] == '$':
  128. inc i
  129. call: scanDollar(p, a, x)
  130. else:
  131. emitChar(p, x, f[i])
  132. inc i
  133. else:
  134. inc i
  135. if not toAdd and choice >= 0:
  136. # evaluate 'else' part:
  137. var last = i
  138. i = elsePart
  139. while true:
  140. case f[i]
  141. of '|', ']': break
  142. of '\'':
  143. call: scanQuote(p, x, true)
  144. of '$':
  145. inc i
  146. call: scanDollar(p, a, x)
  147. else:
  148. emitChar(p, x, f[i])
  149. inc i
  150. i = last
  151. p.i = i+1
  152. proc scanSlice(p: var FormatParser, a: openarray[string]): tuple[x, y: int] =
  153. var slice = false
  154. var i = p.i
  155. var f = p.f
  156. if f[i] == '{': inc i
  157. else: raiseInvalidFormat("'{' expected")
  158. if f[i] == '.' and f[i+1] == '.':
  159. inc i, 2
  160. slice = true
  161. else:
  162. call: result.x = getFormatArg(p, a)
  163. if f[i] == '.' and f[i+1] == '.':
  164. inc i, 2
  165. slice = true
  166. if slice:
  167. if f[i] != '}':
  168. call: result.y = getFormatArg(p, a)
  169. else:
  170. result.y = high(a)
  171. else:
  172. result.y = result.x
  173. if f[i] != '}': raiseInvalidFormat("'}' expected")
  174. inc i
  175. p.i = i
  176. proc scanDollar(p: var FormatParser, a: openarray[string], s: var string) =
  177. var i = p.i
  178. var f = p.f
  179. case f[i]
  180. of '$':
  181. emitChar p, s, '$'
  182. inc i
  183. of '*':
  184. for j in 0..a.high: emitStr p, s, a[j]
  185. inc i
  186. of '{':
  187. call:
  188. let (x, y) = scanSlice(p, a)
  189. for j in x..y: emitStr p, s, a[j]
  190. of '[':
  191. inc i
  192. var start = i
  193. call: scanBranch(p, a, s, -1)
  194. var x: int
  195. if f[i] == '{':
  196. inc i
  197. call: x = getFormatArg(p, a)
  198. if f[i] != '}': raiseInvalidFormat("'}' expected")
  199. inc i
  200. else:
  201. call: x = getFormatArg(p, a)
  202. var last = i
  203. let choice = parseInt(a[x])
  204. i = start
  205. call: scanBranch(p, a, s, choice)
  206. i = last
  207. of '\'':
  208. var sep = ""
  209. callNoLineLenTracking: scanQuote(p, sep, true)
  210. if f[i] == '~':
  211. # $' '~{1..3}
  212. # insert space followed by 1..3 if not empty
  213. inc i
  214. call:
  215. let (x, y) = scanSlice(p, a)
  216. var L = 0
  217. for j in x..y: inc L, a[j].len
  218. if L > 0:
  219. emitStrLinear p, s, sep
  220. for j in x..y: emitStr p, s, a[j]
  221. else:
  222. block StringJoin:
  223. block OptionalLineLengthSpecifier:
  224. var maxLen = 0
  225. case f[i]
  226. of '0'..'9':
  227. while f[i] in Digits:
  228. maxLen = maxLen * 10 + ord(f[i]) - ord('0')
  229. inc i
  230. of '$':
  231. # do not skip the '$' here for `getFormatArg`!
  232. call:
  233. maxLen = getFormatArg(p, a)
  234. else: break OptionalLineLengthSpecifier
  235. var indent = ""
  236. case f[i]
  237. of 'i':
  238. inc i
  239. callNoLineLenTracking: scanQuote(p, indent, true)
  240. call:
  241. let (x, y) = scanSlice(p, a)
  242. if maxLen < 1: emitStrLinear(p, s, indent)
  243. var items = 1
  244. emitStr p, s, a[x]
  245. for j in x+1..y:
  246. emitStr p, s, sep
  247. if items >= maxLen:
  248. emitStrLinear p, s, indent
  249. items = 0
  250. emitStr p, s, a[j]
  251. inc items
  252. of 'c':
  253. inc i
  254. callNoLineLenTracking: scanQuote(p, indent, true)
  255. call:
  256. let (x, y) = scanSlice(p, a)
  257. if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent)
  258. emitStr p, s, a[x]
  259. for j in x+1..y:
  260. emitStr p, s, sep
  261. if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent)
  262. emitStr p, s, a[j]
  263. else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected")
  264. break StringJoin
  265. call:
  266. let (x, y) = scanSlice(p, a)
  267. emitStr p, s, a[x]
  268. for j in x+1..y:
  269. emitStr p, s, sep
  270. emitStr p, s, a[j]
  271. else:
  272. call:
  273. var x = getFormatArg(p, a)
  274. emitStr p, s, a[x]
  275. p.i = i
  276. type
  277. Subex* = distinct string ## string that contains a substitution expression
  278. {.deprecated: [TSubex: Subex].}
  279. proc subex*(s: string): Subex =
  280. ## constructs a *substitution expression* from `s`. Currently this performs
  281. ## no syntax checking but this may change in later versions.
  282. result = Subex(s)
  283. proc addf*(s: var string, formatstr: Subex, a: varargs[string, `$`]) {.
  284. noSideEffect, rtl, extern: "nfrmtAddf".} =
  285. ## The same as ``add(s, formatstr % a)``, but more efficient.
  286. var p: FormatParser
  287. p.f = formatstr.string
  288. var i = 0
  289. while i < len(formatstr.string):
  290. if p.f[i] == '$':
  291. inc i
  292. call: scanDollar(p, a, s)
  293. else:
  294. emitChar(p, s, p.f[i])
  295. inc(i)
  296. proc `%` *(formatstr: Subex, a: openarray[string]): string {.noSideEffect,
  297. rtl, extern: "nfrmtFormatOpenArray".} =
  298. ## The `substitution`:idx: operator performs string substitutions in
  299. ## `formatstr` and returns a modified `formatstr`. This is often called
  300. ## `string interpolation`:idx:.
  301. ##
  302. result = newStringOfCap(formatstr.string.len + a.len shl 4)
  303. addf(result, formatstr, a)
  304. proc `%` *(formatstr: Subex, a: string): string {.noSideEffect,
  305. rtl, extern: "nfrmtFormatSingleElem".} =
  306. ## This is the same as ``formatstr % [a]``.
  307. result = newStringOfCap(formatstr.string.len + a.len)
  308. addf(result, formatstr, [a])
  309. proc format*(formatstr: Subex, a: varargs[string, `$`]): string {.noSideEffect,
  310. rtl, extern: "nfrmtFormatVarargs".} =
  311. ## The `substitution`:idx: operator performs string substitutions in
  312. ## `formatstr` and returns a modified `formatstr`. This is often called
  313. ## `string interpolation`:idx:.
  314. ##
  315. result = newStringOfCap(formatstr.string.len + a.len shl 4)
  316. addf(result, formatstr, a)
  317. {.pop.}
  318. when isMainModule:
  319. from strutils import replace
  320. proc `%`(formatstr: string, a: openarray[string]): string =
  321. result = newStringOfCap(formatstr.len + a.len shl 4)
  322. addf(result, formatstr.Subex, a)
  323. proc `%`(formatstr: string, a: string): string =
  324. result = newStringOfCap(formatstr.len + a.len)
  325. addf(result, formatstr.Subex, [a])
  326. doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
  327. doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
  328. "The cat eats fish."
  329. doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c"
  330. doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c"
  331. doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c"
  332. doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c"
  333. doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c"
  334. doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)"
  335. doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3"
  336. doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)"
  337. doAssert "$' '~{..}" % "" == ""
  338. doAssert "$' '~{..}" % "P0" == " P0"
  339. doAssert "${$1}" % "1" == "1"
  340. doAssert "${$$-1} $$1" % "1" == "1 $1"
  341. doAssert(("$#($', '10c'\n '{#..})" % ["doAssert", "longishA", "longish"]).replace(" \n", "\n") ==
  342. """doAssert(
  343. longishA,
  344. longish)""")
  345. doAssert(("type MyEnum* = enum\n $', '2i'\n '{..}" % ["fieldA",
  346. "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"]).replace(" \n", "\n") ==
  347. strutils.unindent("""
  348. type MyEnum* = enum
  349. fieldA, fieldB,
  350. FiledClkad, fieldD,
  351. fieldE, longishFieldName""", 6))
  352. doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)"
  353. doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied"
  354. doAssert subex"$['''|'|''''|']']#" % "0" == "'|"
  355. doAssert((subex("type\n Enum = enum\n $', '40c'\n '{..}") % [
  356. "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"]).replace(" \n", "\n") ==
  357. strutils.unindent("""
  358. type
  359. Enum = enum
  360. fieldNameA, fieldNameB, fieldNameC,
  361. fieldNameD""", 6))