nimgrep.nim 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. #
  2. #
  3. # Nim Grep Utility
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. import
  10. os, strutils, parseopt, pegs, re, terminal
  11. const
  12. Version = "1.2"
  13. Usage = "nimgrep - Nim Grep Utility Version " & Version & """
  14. (c) 2012 Andreas Rumpf
  15. Usage:
  16. nimgrep [options] [pattern] [replacement] (file/directory)*
  17. Options:
  18. --find, -f find the pattern (default)
  19. --replace, -r replace the pattern
  20. --peg pattern is a peg
  21. --re pattern is a regular expression (default); extended
  22. syntax for the regular expression is always turned on
  23. --recursive process directories recursively
  24. --confirm confirm each occurrence/replacement; there is a chance
  25. to abort any time without touching the file
  26. --stdin read pattern from stdin (to avoid the shell's confusing
  27. quoting rules)
  28. --word, -w the match should have word boundaries (buggy for pegs!)
  29. --ignoreCase, -i be case insensitive
  30. --ignoreStyle, -y be style insensitive
  31. --ext:EX1|EX2|... only search the files with the given extension(s)
  32. --nocolor output will be given without any colours.
  33. --oneline show file on each matched line
  34. --verbose be verbose: list every processed file
  35. --filenames find the pattern in the filenames, not in the contents
  36. of the file
  37. --help, -h shows this help
  38. --version, -v shows the version
  39. """
  40. type
  41. TOption = enum
  42. optFind, optReplace, optPeg, optRegex, optRecursive, optConfirm, optStdin,
  43. optWord, optIgnoreCase, optIgnoreStyle, optVerbose, optFilenames
  44. TOptions = set[TOption]
  45. TConfirmEnum = enum
  46. ceAbort, ceYes, ceAll, ceNo, ceNone
  47. Pattern = Regex | Peg
  48. using pattern: Pattern
  49. var
  50. filenames: seq[string] = @[]
  51. pattern = ""
  52. replacement = ""
  53. extensions: seq[string] = @[]
  54. options: TOptions = {optRegex}
  55. useWriteStyled = true
  56. oneline = false
  57. proc ask(msg: string): string =
  58. stdout.write(msg)
  59. stdout.flushFile()
  60. result = stdin.readLine()
  61. proc confirm: TConfirmEnum =
  62. while true:
  63. case normalize(ask(" [a]bort; [y]es, a[l]l, [n]o, non[e]: "))
  64. of "a", "abort": return ceAbort
  65. of "y", "yes": return ceYes
  66. of "l", "all": return ceAll
  67. of "n", "no": return ceNo
  68. of "e", "none": return ceNone
  69. else: discard
  70. proc countLines(s: string, first, last: int): int =
  71. var i = first
  72. while i <= last:
  73. if s[i] == '\13':
  74. inc result
  75. if i < last and s[i+1] == '\10': inc(i)
  76. elif s[i] == '\10':
  77. inc result
  78. inc i
  79. proc beforePattern(s: string, first: int): int =
  80. result = first-1
  81. while result >= 0:
  82. if s[result] in NewLines: break
  83. dec(result)
  84. inc(result)
  85. proc afterPattern(s: string, last: int): int =
  86. result = last+1
  87. while result < s.len:
  88. if s[result] in NewLines: break
  89. inc(result)
  90. dec(result)
  91. proc writeColored(s: string) =
  92. if useWriteStyled:
  93. terminal.writeStyled(s, {styleUnderscore, styleBright})
  94. else:
  95. stdout.write(s)
  96. proc highlight(s, match, repl: string, t: tuple[first, last: int],
  97. filename:string, line: int, showRepl: bool) =
  98. const alignment = 6
  99. if oneline:
  100. stdout.write(filename, ":", line, ": ")
  101. else:
  102. stdout.write(line.`$`.align(alignment), ": ")
  103. var x = beforePattern(s, t.first)
  104. var y = afterPattern(s, t.last)
  105. for i in x .. t.first-1: stdout.write(s[i])
  106. writeColored(match)
  107. for i in t.last+1 .. y: stdout.write(s[i])
  108. stdout.write("\n")
  109. stdout.flushFile()
  110. if showRepl:
  111. stdout.write(spaces(alignment-1), "-> ")
  112. for i in x .. t.first-1: stdout.write(s[i])
  113. writeColored(repl)
  114. for i in t.last+1 .. y: stdout.write(s[i])
  115. stdout.write("\n")
  116. stdout.flushFile()
  117. proc processFile(pattern; filename: string; counter: var int) =
  118. var filenameShown = false
  119. template beforeHighlight =
  120. if not filenameShown and optVerbose notin options and not oneline:
  121. stdout.writeLine(filename)
  122. stdout.flushFile()
  123. filenameShown = true
  124. var buffer: string
  125. if optFilenames in options:
  126. buffer = filename
  127. else:
  128. try:
  129. buffer = system.readFile(filename)
  130. except IOError:
  131. echo "cannot open file: ", filename
  132. return
  133. if optVerbose in options:
  134. stdout.writeLine(filename)
  135. stdout.flushFile()
  136. var result: string
  137. if optReplace in options:
  138. result = newStringOfCap(buffer.len)
  139. var line = 1
  140. var i = 0
  141. var matches: array[0..re.MaxSubpatterns-1, string]
  142. for j in 0..high(matches): matches[j] = ""
  143. var reallyReplace = true
  144. while i < buffer.len:
  145. let t = findBounds(buffer, pattern, matches, i)
  146. if t.first < 0 or t.last < t.first: break
  147. inc(line, countLines(buffer, i, t.first-1))
  148. var wholeMatch = buffer.substr(t.first, t.last)
  149. beforeHighlight()
  150. inc counter
  151. if optReplace notin options:
  152. highlight(buffer, wholeMatch, "", t, filename, line, showRepl=false)
  153. else:
  154. let r = replace(wholeMatch, pattern, replacement % matches)
  155. if optConfirm in options:
  156. highlight(buffer, wholeMatch, r, t, filename, line, showRepl=true)
  157. case confirm()
  158. of ceAbort: quit(0)
  159. of ceYes: reallyReplace = true
  160. of ceAll:
  161. reallyReplace = true
  162. options.excl(optConfirm)
  163. of ceNo:
  164. reallyReplace = false
  165. of ceNone:
  166. reallyReplace = false
  167. options.excl(optConfirm)
  168. else:
  169. highlight(buffer, wholeMatch, r, t, filename, line, showRepl=reallyReplace)
  170. if reallyReplace:
  171. result.add(buffer.substr(i, t.first-1))
  172. result.add(r)
  173. else:
  174. result.add(buffer.substr(i, t.last))
  175. inc(line, countLines(buffer, t.first, t.last))
  176. i = t.last+1
  177. if optReplace in options:
  178. result.add(substr(buffer, i))
  179. var f: File
  180. if open(f, filename, fmWrite):
  181. f.write(result)
  182. f.close()
  183. else:
  184. quit "cannot open file for overwriting: " & filename
  185. proc hasRightExt(filename: string, exts: seq[string]): bool =
  186. var y = splitFile(filename).ext.substr(1) # skip leading '.'
  187. for x in items(exts):
  188. if os.cmpPaths(x, y) == 0: return true
  189. proc styleInsensitive(s: string): string =
  190. template addx =
  191. result.add(s[i])
  192. inc(i)
  193. result = ""
  194. var i = 0
  195. var brackets = 0
  196. while i < s.len:
  197. case s[i]
  198. of 'A'..'Z', 'a'..'z', '0'..'9':
  199. addx()
  200. if brackets == 0: result.add("_?")
  201. of '_':
  202. addx()
  203. result.add('?')
  204. of '[':
  205. addx()
  206. inc(brackets)
  207. of ']':
  208. addx()
  209. if brackets > 0: dec(brackets)
  210. of '?':
  211. addx()
  212. if s[i] == '<':
  213. addx()
  214. while s[i] != '>' and s[i] != '\0': addx()
  215. of '\\':
  216. addx()
  217. if s[i] in strutils.Digits:
  218. while s[i] in strutils.Digits: addx()
  219. else:
  220. addx()
  221. else: addx()
  222. proc walker(pattern; dir: string; counter: var int) =
  223. for kind, path in walkDir(dir):
  224. case kind
  225. of pcFile:
  226. if extensions.len == 0 or path.hasRightExt(extensions):
  227. processFile(pattern, path, counter)
  228. of pcDir:
  229. if optRecursive in options:
  230. walker(pattern, path, counter)
  231. else: discard
  232. if existsFile(dir): processFile(pattern, dir, counter)
  233. proc writeHelp() =
  234. stdout.write(Usage)
  235. stdout.flushFile()
  236. quit(0)
  237. proc writeVersion() =
  238. stdout.write(Version & "\n")
  239. stdout.flushFile()
  240. quit(0)
  241. proc checkOptions(subset: TOptions, a, b: string) =
  242. if subset <= options:
  243. quit("cannot specify both '$#' and '$#'" % [a, b])
  244. for kind, key, val in getopt():
  245. case kind
  246. of cmdArgument:
  247. if options.contains(optStdin):
  248. filenames.add(key)
  249. elif pattern.len == 0:
  250. pattern = key
  251. elif options.contains(optReplace) and replacement.len == 0:
  252. replacement = key
  253. else:
  254. filenames.add(key)
  255. of cmdLongoption, cmdShortOption:
  256. case normalize(key)
  257. of "find", "f": incl(options, optFind)
  258. of "replace", "r": incl(options, optReplace)
  259. of "peg":
  260. excl(options, optRegex)
  261. incl(options, optPeg)
  262. of "re":
  263. incl(options, optRegex)
  264. excl(options, optPeg)
  265. of "recursive": incl(options, optRecursive)
  266. of "confirm": incl(options, optConfirm)
  267. of "stdin": incl(options, optStdin)
  268. of "word", "w": incl(options, optWord)
  269. of "ignorecase", "i": incl(options, optIgnoreCase)
  270. of "ignorestyle", "y": incl(options, optIgnoreStyle)
  271. of "ext": extensions.add val.split('|')
  272. of "nocolor": useWriteStyled = false
  273. of "oneline": oneline = true
  274. of "verbose": incl(options, optVerbose)
  275. of "filenames": incl(options, optFilenames)
  276. of "help", "h": writeHelp()
  277. of "version", "v": writeVersion()
  278. else: writeHelp()
  279. of cmdEnd: assert(false) # cannot happen
  280. when defined(posix):
  281. useWriteStyled = terminal.isatty(stdout)
  282. checkOptions({optFind, optReplace}, "find", "replace")
  283. checkOptions({optPeg, optRegex}, "peg", "re")
  284. checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style")
  285. checkOptions({optFilenames, optReplace}, "filenames", "replace")
  286. if optStdin in options:
  287. pattern = ask("pattern [ENTER to exit]: ")
  288. if pattern.len == 0: quit(0)
  289. if optReplace in options:
  290. replacement = ask("replacement [supports $1, $# notations]: ")
  291. if pattern.len == 0:
  292. writeHelp()
  293. else:
  294. var counter = 0
  295. if filenames.len == 0:
  296. filenames.add(os.getCurrentDir())
  297. if optRegex notin options:
  298. if optWord in options:
  299. pattern = r"(^ / !\letter)(" & pattern & r") !\letter"
  300. if optIgnoreStyle in options:
  301. pattern = "\\y " & pattern
  302. elif optIgnoreCase in options:
  303. pattern = "\\i " & pattern
  304. let pegp = peg(pattern)
  305. for f in items(filenames):
  306. walker(pegp, f, counter)
  307. else:
  308. var reflags = {reStudy}
  309. if optIgnoreStyle in options:
  310. pattern = styleInsensitive(pattern)
  311. if optWord in options:
  312. pattern = r"\b (:?" & pattern & r") \b"
  313. if {optIgnoreCase, optIgnoreStyle} * options != {}:
  314. reflags.incl reIgnoreCase
  315. let rep = re(pattern, reflags)
  316. for f in items(filenames):
  317. walker(rep, f, counter)
  318. if not oneline:
  319. stdout.write($counter & " matches\n")