parseopt.nim 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2015 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module provides the standard Nim command line parser.
  10. ## It supports one convenience iterator over all command line options and some
  11. ## lower-level features.
  12. ##
  13. ## Supported Syntax
  14. ## ================
  15. ##
  16. ## The following syntax is supported when arguments for the ``shortNoVal`` and
  17. ## ``longNoVal`` parameters, which are
  18. ## `described later<#shortnoval-and-longnoval>`_, are not provided:
  19. ##
  20. ## 1. Short options: ``-abcd``, ``-e:5``, ``-e=5``
  21. ## 2. Long options: ``--foo:bar``, ``--foo=bar``, ``--foo``
  22. ## 3. Arguments: everything that does not start with a ``-``
  23. ##
  24. ## These three kinds of tokens are enumerated in the
  25. ## `CmdLineKind enum<#CmdLineKind>`_.
  26. ##
  27. ## When option values begin with ':' or '=', they need to be doubled up (as in
  28. ## ``--delim::``) or alternated (as in ``--delim=:``).
  29. ##
  30. ## The ``--`` option, commonly used to denote that every token that follows is
  31. ## an argument, is interpreted as a long option, and its name is the empty
  32. ## string.
  33. ##
  34. ## Parsing
  35. ## =======
  36. ##
  37. ## Use an `OptParser<#OptParser>`_ to parse command line options. It can be
  38. ## created with `initOptParser<#initOptParser,string,set[char],seq[string]>`_,
  39. ## and `next<#next,OptParser>`_ advances the parser by one token.
  40. ##
  41. ## For each token, the parser's ``kind``, ``key``, and ``val`` fields give
  42. ## information about that token. If the token is a long or short option, ``key``
  43. ## is the option's name, and ``val`` is either the option's value, if provided,
  44. ## or the empty string. For arguments, the ``key`` field contains the argument
  45. ## itself, and ``val`` is unused. To check if the end of the command line has
  46. ## been reached, check if ``kind`` is equal to ``cmdEnd``.
  47. ##
  48. ## Here is an example:
  49. ##
  50. ## .. code-block::
  51. ## import parseopt
  52. ##
  53. ## var p = initOptParser("-ab -e:5 --foo --bar=20 file.txt")
  54. ## while true:
  55. ## p.next()
  56. ## case p.kind
  57. ## of cmdEnd: break
  58. ## of cmdShortOption, cmdLongOption:
  59. ## if p.val == "":
  60. ## echo "Option: ", p.key
  61. ## else:
  62. ## echo "Option and value: ", p.key, ", ", p.val
  63. ## of cmdArgument:
  64. ## echo "Argument: ", p.key
  65. ##
  66. ## # Output:
  67. ## # Option: a
  68. ## # Option: b
  69. ## # Option and value: e, 5
  70. ## # Option: foo
  71. ## # Option and value: bar, 20
  72. ## # Argument: file.txt
  73. ##
  74. ## The `getopt iterator<#getopt.i,OptParser>`_, which is provided for
  75. ## convenience, can be used to iterate through all command line options as well.
  76. ##
  77. ## ``shortNoVal`` and ``longNoVal``
  78. ## ================================
  79. ##
  80. ## The optional ``shortNoVal`` and ``longNoVal`` parameters present in
  81. ## `initOptParser<#initOptParser,string,set[char],seq[string]>`_ are for
  82. ## specifying which short and long options do not accept values.
  83. ##
  84. ## When ``shortNoVal`` is non-empty, users are not required to separate short
  85. ## options and their values with a ':' or '=' since the parser knows which
  86. ## options accept values and which ones do not. This behavior also applies for
  87. ## long options if ``longNoVal`` is non-empty. For short options, ``-j4``
  88. ## becomes supported syntax, and for long options, ``--foo bar`` becomes
  89. ## supported. This is in addition to the `previously mentioned
  90. ## syntax<#supported-syntax>`_. Users can still separate options and their
  91. ## values with ':' or '=', but that becomes optional.
  92. ##
  93. ## As more options which do not accept values are added to your program,
  94. ## remember to amend ``shortNoVal`` and ``longNoVal`` accordingly.
  95. ##
  96. ## The following example illustrates the difference between having an empty
  97. ## ``shortNoVal`` and ``longNoVal``, which is the default, and providing
  98. ## arguments for those two parameters:
  99. ##
  100. ## .. code-block::
  101. ## import parseopt
  102. ##
  103. ## proc printToken(kind: CmdLineKind, key: string, val: string) =
  104. ## case kind
  105. ## of cmdEnd: doAssert(false) # Doesn't happen with getopt()
  106. ## of cmdShortOption, cmdLongOption:
  107. ## if val == "":
  108. ## echo "Option: ", key
  109. ## else:
  110. ## echo "Option and value: ", key, ", ", val
  111. ## of cmdArgument:
  112. ## echo "Argument: ", key
  113. ##
  114. ## let cmdLine = "-j4 --first bar"
  115. ##
  116. ## var emptyNoVal = initOptParser(cmdLine)
  117. ## for kind, key, val in emptyNoVal.getopt():
  118. ## printToken(kind, key, val)
  119. ##
  120. ## # Output:
  121. ## # Option: j
  122. ## # Option: 4
  123. ## # Option: first
  124. ## # Argument: bar
  125. ##
  126. ## var withNoVal = initOptParser(cmdLine, shortNoVal = {'c'},
  127. ## longNoVal = @["second"])
  128. ## for kind, key, val in withNoVal.getopt():
  129. ## printToken(kind, key, val)
  130. ##
  131. ## # Output:
  132. ## # Option and value: j, 4
  133. ## # Option and value: first, bar
  134. ##
  135. ## See also
  136. ## ========
  137. ##
  138. ## * `os module<os.html>`_ for lower-level command line parsing procs
  139. ## * `parseutils module<parseutils.html>`_ for helpers that parse tokens,
  140. ## numbers, identifiers, etc.
  141. ## * `strutils module<strutils.html>`_ for common string handling operations
  142. ## * `json module<json.html>`_ for a JSON parser
  143. ## * `parsecfg module<parsecfg.html>`_ for a configuration file parser
  144. ## * `parsecsv module<parsecsv.html>`_ for a simple CSV (comma separated value)
  145. ## parser
  146. ## * `parsexml module<parsexml.html>`_ for a XML / HTML parser
  147. ## * `other parsers<lib.html#pure-libraries-parsers>`_ for more parsers
  148. {.push debugger: off.}
  149. include "system/inclrtl"
  150. import
  151. os, strutils
  152. type
  153. CmdLineKind* = enum ## The detected command line token.
  154. cmdEnd, ## End of command line reached
  155. cmdArgument, ## An argument such as a filename
  156. cmdLongOption, ## A long option such as --option
  157. cmdShortOption ## A short option such as -c
  158. OptParser* = object of RootObj ## \
  159. ## Implementation of the command line parser.
  160. ##
  161. ## To initialize it, use the
  162. ## `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_.
  163. pos*: int
  164. inShortState: bool
  165. allowWhitespaceAfterColon: bool
  166. shortNoVal: set[char]
  167. longNoVal: seq[string]
  168. cmds: seq[string]
  169. idx: int
  170. kind*: CmdLineKind ## The detected command line token
  171. key*, val*: TaintedString ## Key and value pair; the key is the option
  172. ## or the argument, and the value is not "" if
  173. ## the option was given a value
  174. proc parseWord(s: string, i: int, w: var string,
  175. delim: set[char] = {'\t', ' '}): int =
  176. result = i
  177. if result < s.len and s[result] == '\"':
  178. inc(result)
  179. while result < s.len:
  180. if s[result] == '"':
  181. inc result
  182. break
  183. add(w, s[result])
  184. inc(result)
  185. else:
  186. while result < s.len and s[result] notin delim:
  187. add(w, s[result])
  188. inc(result)
  189. when declared(os.paramCount):
  190. # we cannot provide this for NimRtl creation on Posix, because we can't
  191. # access the command line arguments then!
  192. proc initOptParser*(cmdline = "", shortNoVal: set[char] = {},
  193. longNoVal: seq[string] = @[];
  194. allowWhitespaceAfterColon = true): OptParser =
  195. ## Initializes the command line parser.
  196. ##
  197. ## If ``cmdline == ""``, the real command line as provided by the
  198. ## ``os`` module is retrieved instead.
  199. ##
  200. ## ``shortNoVal`` and ``longNoVal`` are used to specify which options
  201. ## do not take values. See the `documentation about these
  202. ## parameters<#shortnoval-and-longnoval>`_ for more information on
  203. ## how this affects parsing.
  204. ##
  205. ## See also:
  206. ## * `getopt iterator<#getopt.i,OptParser>`_
  207. runnableExamples:
  208. var p = initOptParser()
  209. p = initOptParser("--left --debug:3 -l -r:2")
  210. p = initOptParser("--left --debug:3 -l -r:2",
  211. shortNoVal = {'l'}, longNoVal = @["left"])
  212. result.pos = 0
  213. result.idx = 0
  214. result.inShortState = false
  215. result.shortNoVal = shortNoVal
  216. result.longNoVal = longNoVal
  217. result.allowWhitespaceAfterColon = allowWhitespaceAfterColon
  218. if cmdline != "":
  219. result.cmds = parseCmdLine(cmdline)
  220. else:
  221. result.cmds = newSeq[string](os.paramCount())
  222. for i in countup(1, os.paramCount()):
  223. result.cmds[i-1] = os.paramStr(i).string
  224. result.kind = cmdEnd
  225. result.key = TaintedString""
  226. result.val = TaintedString""
  227. proc initOptParser*(cmdline: seq[TaintedString], shortNoVal: set[char] = {},
  228. longNoVal: seq[string] = @[];
  229. allowWhitespaceAfterColon = true): OptParser =
  230. ## Initializes the command line parser.
  231. ##
  232. ## If ``cmdline.len == 0``, the real command line as provided by the
  233. ## ``os`` module is retrieved instead. Behavior of the other parameters
  234. ## remains the same as in `initOptParser(string, ...)
  235. ## <#initOptParser,string,set[char],seq[string]>`_.
  236. ##
  237. ## See also:
  238. ## * `getopt iterator<#getopt.i,seq[TaintedString],set[char],seq[string]>`_
  239. runnableExamples:
  240. var p = initOptParser()
  241. p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"])
  242. p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"],
  243. shortNoVal = {'l'}, longNoVal = @["left"])
  244. result.pos = 0
  245. result.idx = 0
  246. result.inShortState = false
  247. result.shortNoVal = shortNoVal
  248. result.longNoVal = longNoVal
  249. result.allowWhitespaceAfterColon = allowWhitespaceAfterColon
  250. if cmdline.len != 0:
  251. result.cmds = newSeq[string](cmdline.len)
  252. for i in 0..<cmdline.len:
  253. result.cmds[i] = cmdline[i].string
  254. else:
  255. result.cmds = newSeq[string](os.paramCount())
  256. for i in countup(1, os.paramCount()):
  257. result.cmds[i-1] = os.paramStr(i).string
  258. result.kind = cmdEnd
  259. result.key = TaintedString""
  260. result.val = TaintedString""
  261. proc handleShortOption(p: var OptParser; cmd: string) =
  262. var i = p.pos
  263. p.kind = cmdShortOption
  264. if i < cmd.len:
  265. add(p.key.string, cmd[i])
  266. inc(i)
  267. p.inShortState = true
  268. while i < cmd.len and cmd[i] in {'\t', ' '}:
  269. inc(i)
  270. p.inShortState = false
  271. if i < cmd.len and cmd[i] in {':', '='} or
  272. card(p.shortNoVal) > 0 and p.key.string[0] notin p.shortNoVal:
  273. if i < cmd.len and cmd[i] in {':', '='}:
  274. inc(i)
  275. p.inShortState = false
  276. while i < cmd.len and cmd[i] in {'\t', ' '}: inc(i)
  277. p.val = TaintedString substr(cmd, i)
  278. p.pos = 0
  279. inc p.idx
  280. else:
  281. p.pos = i
  282. if i >= cmd.len:
  283. p.inShortState = false
  284. p.pos = 0
  285. inc p.idx
  286. proc next*(p: var OptParser) {.rtl, extern: "npo$1".} =
  287. ## Parses the next token.
  288. ##
  289. ## ``p.kind`` describes what kind of token has been parsed. ``p.key`` and
  290. ## ``p.val`` are set accordingly.
  291. runnableExamples:
  292. var p = initOptParser("--left -r:2 file.txt")
  293. p.next()
  294. doAssert p.kind == cmdLongOption and p.key == "left"
  295. p.next()
  296. doAssert p.kind == cmdShortOption and p.key == "r" and p.val == "2"
  297. p.next()
  298. doAssert p.kind == cmdArgument and p.key == "file.txt"
  299. p.next()
  300. doAssert p.kind == cmdEnd
  301. if p.idx >= p.cmds.len:
  302. p.kind = cmdEnd
  303. return
  304. var i = p.pos
  305. while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
  306. p.pos = i
  307. setLen(p.key.string, 0)
  308. setLen(p.val.string, 0)
  309. if p.inShortState:
  310. p.inShortState = false
  311. if i >= p.cmds[p.idx].len:
  312. inc(p.idx)
  313. p.pos = 0
  314. if p.idx >= p.cmds.len:
  315. p.kind = cmdEnd
  316. return
  317. else:
  318. handleShortOption(p, p.cmds[p.idx])
  319. return
  320. if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-':
  321. inc(i)
  322. if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-':
  323. p.kind = cmdLongOption
  324. inc(i)
  325. i = parseWord(p.cmds[p.idx], i, p.key.string, {' ', '\t', ':', '='})
  326. while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
  327. if i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {':', '='}:
  328. inc(i)
  329. while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
  330. # if we're at the end, use the next command line option:
  331. if i >= p.cmds[p.idx].len and p.idx < p.cmds.len and
  332. p.allowWhitespaceAfterColon:
  333. inc p.idx
  334. i = 0
  335. if p.idx < p.cmds.len:
  336. p.val = TaintedString p.cmds[p.idx].substr(i)
  337. elif len(p.longNoVal) > 0 and p.key.string notin p.longNoVal and p.idx+1 < p.cmds.len:
  338. p.val = TaintedString p.cmds[p.idx+1]
  339. inc p.idx
  340. else:
  341. p.val = TaintedString""
  342. inc p.idx
  343. p.pos = 0
  344. else:
  345. p.pos = i
  346. handleShortOption(p, p.cmds[p.idx])
  347. else:
  348. p.kind = cmdArgument
  349. p.key = TaintedString p.cmds[p.idx]
  350. inc p.idx
  351. p.pos = 0
  352. proc cmdLineRest*(p: OptParser): TaintedString {.rtl, extern: "npo$1".} =
  353. ## Retrieves the rest of the command line that has not been parsed yet.
  354. ##
  355. ## See also:
  356. ## * `remainingArgs proc<#remainingArgs,OptParser>`_
  357. ##
  358. ## **Examples:**
  359. ##
  360. ## .. code-block::
  361. ## var p = initOptParser("--left -r:2 -- foo.txt bar.txt")
  362. ## while true:
  363. ## p.next()
  364. ## if p.kind == cmdLongOption and p.key == "": # Look for "--"
  365. ## break
  366. ## else: continue
  367. ## doAssert p.cmdLineRest == "foo.txt bar.txt"
  368. result = p.cmds[p.idx .. ^1].quoteShellCommand.TaintedString
  369. proc remainingArgs*(p: OptParser): seq[TaintedString] {.rtl, extern: "npo$1".} =
  370. ## Retrieves a sequence of the arguments that have not been parsed yet.
  371. ##
  372. ## See also:
  373. ## * `cmdLineRest proc<#cmdLineRest,OptParser>`_
  374. ##
  375. ## **Examples:**
  376. ##
  377. ## .. code-block::
  378. ## var p = initOptParser("--left -r:2 -- foo.txt bar.txt")
  379. ## while true:
  380. ## p.next()
  381. ## if p.kind == cmdLongOption and p.key == "": # Look for "--"
  382. ## break
  383. ## else: continue
  384. ## doAssert p.remainingArgs == @["foo.txt", "bar.txt"]
  385. result = @[]
  386. for i in p.idx..<p.cmds.len: result.add TaintedString(p.cmds[i])
  387. iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key,
  388. val: TaintedString] =
  389. ## Convenience iterator for iterating over the given
  390. ## `OptParser<#OptParser>`_.
  391. ##
  392. ## There is no need to check for ``cmdEnd`` while iterating.
  393. ##
  394. ## See also:
  395. ## * `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_
  396. ##
  397. ## **Examples:**
  398. ##
  399. ## .. code-block::
  400. ## # these are placeholders, of course
  401. ## proc writeHelp() = discard
  402. ## proc writeVersion() = discard
  403. ##
  404. ## var filename: string
  405. ## var p = initOptParser("--left --debug:3 -l -r:2")
  406. ##
  407. ## for kind, key, val in p.getopt():
  408. ## case kind
  409. ## of cmdArgument:
  410. ## filename = key
  411. ## of cmdLongOption, cmdShortOption:
  412. ## case key
  413. ## of "help", "h": writeHelp()
  414. ## of "version", "v": writeVersion()
  415. ## of cmdEnd: assert(false) # cannot happen
  416. ## if filename == "":
  417. ## # no filename has been given, so we show the help
  418. ## writeHelp()
  419. p.pos = 0
  420. p.idx = 0
  421. while true:
  422. next(p)
  423. if p.kind == cmdEnd: break
  424. yield (p.kind, p.key, p.val)
  425. when declared(initOptParser):
  426. iterator getopt*(cmdline: seq[TaintedString] = commandLineParams(),
  427. shortNoVal: set[char] = {}, longNoVal: seq[string] = @[]):
  428. tuple[kind: CmdLineKind, key, val: TaintedString] =
  429. ## Convenience iterator for iterating over command line arguments.
  430. ##
  431. ## This creates a new `OptParser<#OptParser>`_. If no command line
  432. ## arguments are provided, the real command line as provided by the
  433. ## ``os`` module is retrieved instead.
  434. ##
  435. ## ``shortNoVal`` and ``longNoVal`` are used to specify which options
  436. ## do not take values. See the `documentation about these
  437. ## parameters<#shortnoval-and-longnoval>`_ for more information on
  438. ## how this affects parsing.
  439. ##
  440. ## There is no need to check for ``cmdEnd`` while iterating.
  441. ##
  442. ## See also:
  443. ## * `initOptParser proc<#initOptParser,seq[TaintedString],set[char],seq[string]>`_
  444. ##
  445. ## **Examples:**
  446. ##
  447. ## .. code-block::
  448. ##
  449. ## # these are placeholders, of course
  450. ## proc writeHelp() = discard
  451. ## proc writeVersion() = discard
  452. ##
  453. ## var filename: string
  454. ## let params = @["--left", "--debug:3", "-l", "-r:2"]
  455. ##
  456. ## for kind, key, val in getopt(params):
  457. ## case kind
  458. ## of cmdArgument:
  459. ## filename = key
  460. ## of cmdLongOption, cmdShortOption:
  461. ## case key
  462. ## of "help", "h": writeHelp()
  463. ## of "version", "v": writeVersion()
  464. ## of cmdEnd: assert(false) # cannot happen
  465. ## if filename == "":
  466. ## # no filename has been written, so we show the help
  467. ## writeHelp()
  468. var p = initOptParser(cmdline, shortNoVal = shortNoVal,
  469. longNoVal = longNoVal)
  470. while true:
  471. next(p)
  472. if p.kind == cmdEnd: break
  473. yield (p.kind, p.key, p.val)
  474. {.pop.}