nimgrep.nim 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. #
  2. #
  3. # Nim Grep Utility
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. import
  10. os, strutils, parseopt, pegs, re, terminal
  11. const
  12. Version = "1.1"
  13. Usage = "nimgrep - Nim Grep Utility Version " & Version & """
  14. (c) 2012 Andreas Rumpf
  15. Usage:
  16. nimgrep [options] [pattern] [replacement] (file/directory)*
  17. Options:
  18. --find, -f find the pattern (default)
  19. --replace, -r replace the pattern
  20. --peg pattern is a peg
  21. --re pattern is a regular expression (default); extended
  22. syntax for the regular expression is always turned on
  23. --recursive process directories recursively
  24. --confirm confirm each occurrence/replacement; there is a chance
  25. to abort any time without touching the file
  26. --stdin read pattern from stdin (to avoid the shell's confusing
  27. quoting rules)
  28. --word, -w the match should have word boundaries (buggy for pegs!)
  29. --ignoreCase, -i be case insensitive
  30. --ignoreStyle, -y be style insensitive
  31. --ext:EX1|EX2|... only search the files with the given extension(s)
  32. --nocolor output will be given without any colours.
  33. --verbose be verbose: list every processed file
  34. --help, -h shows this help
  35. --version, -v shows the version
  36. """
  37. type
  38. TOption = enum
  39. optFind, optReplace, optPeg, optRegex, optRecursive, optConfirm, optStdin,
  40. optWord, optIgnoreCase, optIgnoreStyle, optVerbose
  41. TOptions = set[TOption]
  42. TConfirmEnum = enum
  43. ceAbort, ceYes, ceAll, ceNo, ceNone
  44. var
  45. filenames: seq[string] = @[]
  46. pattern = ""
  47. replacement = ""
  48. extensions: seq[string] = @[]
  49. options: TOptions = {optRegex}
  50. useWriteStyled = true
  51. proc ask(msg: string): string =
  52. stdout.write(msg)
  53. stdout.flushFile()
  54. result = stdin.readLine()
  55. proc confirm: TConfirmEnum =
  56. while true:
  57. case normalize(ask(" [a]bort; [y]es, a[l]l, [n]o, non[e]: "))
  58. of "a", "abort": return ceAbort
  59. of "y", "yes": return ceYes
  60. of "l", "all": return ceAll
  61. of "n", "no": return ceNo
  62. of "e", "none": return ceNone
  63. else: discard
  64. proc countLines(s: string, first, last: int): int =
  65. var i = first
  66. while i <= last:
  67. if s[i] == '\13':
  68. inc result
  69. if i < last and s[i+1] == '\10': inc(i)
  70. elif s[i] == '\10':
  71. inc result
  72. inc i
  73. proc beforePattern(s: string, first: int): int =
  74. result = first-1
  75. while result >= 0:
  76. if s[result] in NewLines: break
  77. dec(result)
  78. inc(result)
  79. proc afterPattern(s: string, last: int): int =
  80. result = last+1
  81. while result < s.len:
  82. if s[result] in NewLines: break
  83. inc(result)
  84. dec(result)
  85. proc writeColored(s: string) =
  86. if useWriteStyled:
  87. terminal.writeStyled(s, {styleUnderscore, styleBright})
  88. else:
  89. stdout.write(s)
  90. proc highlight(s, match, repl: string, t: tuple[first, last: int],
  91. line: int, showRepl: bool) =
  92. const alignment = 6
  93. stdout.write(line.`$`.align(alignment), ": ")
  94. var x = beforePattern(s, t.first)
  95. var y = afterPattern(s, t.last)
  96. for i in x .. t.first-1: stdout.write(s[i])
  97. writeColored(match)
  98. for i in t.last+1 .. y: stdout.write(s[i])
  99. stdout.write("\n")
  100. stdout.flushFile()
  101. if showRepl:
  102. stdout.write(spaces(alignment-1), "-> ")
  103. for i in x .. t.first-1: stdout.write(s[i])
  104. writeColored(repl)
  105. for i in t.last+1 .. y: stdout.write(s[i])
  106. stdout.write("\n")
  107. stdout.flushFile()
  108. proc processFile(filename: string) =
  109. var filenameShown = false
  110. template beforeHighlight =
  111. if not filenameShown and optVerbose notin options:
  112. stdout.writeLine(filename)
  113. stdout.flushFile()
  114. filenameShown = true
  115. var buffer: string
  116. try:
  117. buffer = system.readFile(filename)
  118. except IOError:
  119. echo "cannot open file: ", filename
  120. return
  121. if optVerbose in options:
  122. stdout.writeLine(filename)
  123. stdout.flushFile()
  124. var pegp: Peg
  125. var rep: Regex
  126. var result: string
  127. if optRegex in options:
  128. if {optIgnoreCase, optIgnoreStyle} * options != {}:
  129. rep = re(pattern, {reExtended, reIgnoreCase})
  130. else:
  131. rep = re(pattern)
  132. else:
  133. pegp = peg(pattern)
  134. if optReplace in options:
  135. result = newStringOfCap(buffer.len)
  136. var line = 1
  137. var i = 0
  138. var matches: array[0..re.MaxSubpatterns-1, string]
  139. for j in 0..high(matches): matches[j] = ""
  140. var reallyReplace = true
  141. while i < buffer.len:
  142. var t: tuple[first, last: int]
  143. if optRegex notin options:
  144. t = findBounds(buffer, pegp, matches, i)
  145. else:
  146. t = findBounds(buffer, rep, matches, i)
  147. if t.first < 0: break
  148. inc(line, countLines(buffer, i, t.first-1))
  149. var wholeMatch = buffer.substr(t.first, t.last)
  150. beforeHighlight()
  151. if optReplace notin options:
  152. highlight(buffer, wholeMatch, "", t, line, showRepl=false)
  153. else:
  154. var r: string
  155. if optRegex notin options:
  156. r = replace(wholeMatch, pegp, replacement % matches)
  157. else:
  158. r = replace(wholeMatch, rep, replacement % matches)
  159. if optConfirm in options:
  160. highlight(buffer, wholeMatch, r, t, line, showRepl=true)
  161. case confirm()
  162. of ceAbort: quit(0)
  163. of ceYes: reallyReplace = true
  164. of ceAll:
  165. reallyReplace = true
  166. options.excl(optConfirm)
  167. of ceNo:
  168. reallyReplace = false
  169. of ceNone:
  170. reallyReplace = false
  171. options.excl(optConfirm)
  172. else:
  173. highlight(buffer, wholeMatch, r, t, line, showRepl=reallyReplace)
  174. if reallyReplace:
  175. result.add(buffer.substr(i, t.first-1))
  176. result.add(r)
  177. else:
  178. result.add(buffer.substr(i, t.last))
  179. inc(line, countLines(buffer, t.first, t.last))
  180. i = t.last+1
  181. if optReplace in options:
  182. result.add(substr(buffer, i))
  183. var f: File
  184. if open(f, filename, fmWrite):
  185. f.write(result)
  186. f.close()
  187. else:
  188. quit "cannot open file for overwriting: " & filename
  189. proc hasRightExt(filename: string, exts: seq[string]): bool =
  190. var y = splitFile(filename).ext.substr(1) # skip leading '.'
  191. for x in items(exts):
  192. if os.cmpPaths(x, y) == 0: return true
  193. proc styleInsensitive(s: string): string =
  194. template addx =
  195. result.add(s[i])
  196. inc(i)
  197. result = ""
  198. var i = 0
  199. var brackets = 0
  200. while i < s.len:
  201. case s[i]
  202. of 'A'..'Z', 'a'..'z', '0'..'9':
  203. addx()
  204. if brackets == 0: result.add("_?")
  205. of '_':
  206. addx()
  207. result.add('?')
  208. of '[':
  209. addx()
  210. inc(brackets)
  211. of ']':
  212. addx()
  213. if brackets > 0: dec(brackets)
  214. of '?':
  215. addx()
  216. if s[i] == '<':
  217. addx()
  218. while s[i] != '>' and s[i] != '\0': addx()
  219. of '\\':
  220. addx()
  221. if s[i] in strutils.Digits:
  222. while s[i] in strutils.Digits: addx()
  223. else:
  224. addx()
  225. else: addx()
  226. proc walker(dir: string) =
  227. for kind, path in walkDir(dir):
  228. case kind
  229. of pcFile:
  230. if extensions.len == 0 or path.hasRightExt(extensions):
  231. processFile(path)
  232. of pcDir:
  233. if optRecursive in options:
  234. walker(path)
  235. else: discard
  236. if existsFile(dir): processFile(dir)
  237. proc writeHelp() =
  238. stdout.write(Usage)
  239. stdout.flushFile()
  240. quit(0)
  241. proc writeVersion() =
  242. stdout.write(Version & "\n")
  243. stdout.flushFile()
  244. quit(0)
  245. proc checkOptions(subset: TOptions, a, b: string) =
  246. if subset <= options:
  247. quit("cannot specify both '$#' and '$#'" % [a, b])
  248. for kind, key, val in getopt():
  249. case kind
  250. of cmdArgument:
  251. if options.contains(optStdin):
  252. filenames.add(key)
  253. elif pattern.len == 0:
  254. pattern = key
  255. elif options.contains(optReplace) and replacement.len == 0:
  256. replacement = key
  257. else:
  258. filenames.add(key)
  259. of cmdLongoption, cmdShortOption:
  260. case normalize(key)
  261. of "find", "f": incl(options, optFind)
  262. of "replace", "r": incl(options, optReplace)
  263. of "peg":
  264. excl(options, optRegex)
  265. incl(options, optPeg)
  266. of "re":
  267. incl(options, optRegex)
  268. excl(options, optPeg)
  269. of "recursive": incl(options, optRecursive)
  270. of "confirm": incl(options, optConfirm)
  271. of "stdin": incl(options, optStdin)
  272. of "word", "w": incl(options, optWord)
  273. of "ignorecase", "i": incl(options, optIgnoreCase)
  274. of "ignorestyle", "y": incl(options, optIgnoreStyle)
  275. of "ext": extensions.add val.split('|')
  276. of "nocolor": useWriteStyled = false
  277. of "verbose": incl(options, optVerbose)
  278. of "help", "h": writeHelp()
  279. of "version", "v": writeVersion()
  280. else: writeHelp()
  281. of cmdEnd: assert(false) # cannot happen
  282. when defined(posix):
  283. useWriteStyled = terminal.isatty(stdout)
  284. checkOptions({optFind, optReplace}, "find", "replace")
  285. checkOptions({optPeg, optRegex}, "peg", "re")
  286. checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style")
  287. if optStdin in options:
  288. pattern = ask("pattern [ENTER to exit]: ")
  289. if isNil(pattern) or pattern.len == 0: quit(0)
  290. if optReplace in options:
  291. replacement = ask("replacement [supports $1, $# notations]: ")
  292. if pattern.len == 0:
  293. writeHelp()
  294. else:
  295. if filenames.len == 0:
  296. filenames.add(os.getCurrentDir())
  297. if optRegex notin options:
  298. if optWord in options:
  299. pattern = r"(^ / !\letter)(" & pattern & r") !\letter"
  300. if optIgnoreStyle in options:
  301. pattern = "\\y " & pattern
  302. elif optIgnoreCase in options:
  303. pattern = "\\i " & pattern
  304. else:
  305. if optIgnoreStyle in options:
  306. pattern = styleInsensitive(pattern)
  307. if optWord in options:
  308. pattern = r"\b (:?" & pattern & r") \b"
  309. for f in items(filenames):
  310. walker(f)