logs.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #! /usr/bin/env python3
  2. # (c) 2008 Thomas Viehmann
  3. # Free software licensed under the GPL version 2 or later
  4. import argparse
  5. import bz2
  6. import json
  7. import os
  8. import re
  9. import datetime
  10. import subprocess
  11. import sys
  12. import tempfile
  13. from collections import defaultdict
  14. ITEMS_TO_KEEP = 20
  15. CACHE_FILE = '/srv/ftp-master.debian.org/misc/dinstall_time_cache'
  16. GRAPH_DIR = '/srv/ftp.debian.org/web/stat'
  17. graphs = {
  18. "dinstall": {
  19. "keystolist": [
  20. "pdiff", "packages", "dakcleanup", "changelogs", "mkfilesindices",
  21. "mpfm", "dep11", "release", "ddaccess", "mkchecksums",
  22. ],
  23. "showothers": True},
  24. }
  25. RE_LINE = re.compile(
  26. rb'\A... .. (\d{2}):(\d{2}):(\d{2}) .*: '
  27. rb'########## dinstall (BEGIN|END): ([a-z0-9]+) .*##########')
  28. def parse_log(path: str):
  29. begin = {}
  30. times = {}
  31. opener = bz2.open if path.endswith(".bz2") else open
  32. with opener(path, "rb") as fh:
  33. for line in fh:
  34. m = RE_LINE.match(line)
  35. if not m:
  36. continue
  37. t = 3600 * int(m[1]) + 60 * int(m[2]) + int(m[3])
  38. event = m[4]
  39. task = m[5].decode()
  40. if event == b"BEGIN":
  41. begin[task] = t
  42. elif event == b"END":
  43. t0 = begin.get(task)
  44. if t0 is not None:
  45. times[task] = (t - t0) / 60.0
  46. else:
  47. print(f"W: {task} ended, but didn't start", file=sys.stderr)
  48. return times
  49. parser = argparse.ArgumentParser(description='plot runtime for dinstall tasks')
  50. parser.add_argument('--items-to-keep', type=int, default=ITEMS_TO_KEEP, metavar='N')
  51. parser.add_argument('--cache-file', default=CACHE_FILE, metavar='PATH')
  52. parser.add_argument('--graph-dir', default=GRAPH_DIR, metavar='PATH')
  53. parser.add_argument('log', nargs='*')
  54. options = parser.parse_args()
  55. data = {}
  56. try:
  57. with open(options.cache_file) as fh:
  58. data = json.load(fh)
  59. except (FileNotFoundError, json.JSONDecodeError):
  60. pass
  61. RE_PATH = re.compile(r'dinstall_(\d{4})\.(\d{2})\.(\d{2})-(\d{2}):(\d{2}):(\d{2})\.log(?:\.bz2)?')
  62. for path in options.log:
  63. m = RE_PATH.search(path)
  64. if not m:
  65. raise Exception(f"Unexpected filename '{path}'")
  66. t = str(datetime.datetime(*(int(x) for x in m.groups())))
  67. data[t] = parse_log(path)
  68. datakeys = sorted(data.keys())
  69. datakeys = datakeys[-options.items_to_keep:]
  70. data = dict((k, data[k]) for k in datakeys)
  71. averages = defaultdict(float)
  72. for times in data.values():
  73. for task, t in times.items():
  74. averages[task] += t
  75. for task in averages.keys():
  76. averages[task] /= len(data)
  77. for task, t in sorted(averages.items(), key=lambda xs: xs[1], reverse=True):
  78. print(f"{task}: {t:.2f}")
  79. with open(f"{options.cache_file}.tmp", "x") as fh:
  80. json.dump(data, fh)
  81. os.rename(f"{options.cache_file}.tmp", options.cache_file)
  82. def dump_file(outfn, keystolist, showothers):
  83. showothers = (showothers and 1) or 0
  84. # careful, outfn is NOT ESCAPED
  85. f = tempfile.NamedTemporaryFile("w+t")
  86. print('\t'.join(keystolist + showothers * ['other']), file=f)
  87. for t, times in data.items():
  88. others = sum(dt for task, dt in times.items() if task not in keystolist)
  89. print(t + '\t' + '\t'.join([str(times.get(task, 0)) for task in keystolist] + showothers * [str(others)]), file=f)
  90. f.flush()
  91. script = """
  92. bitmap(file = "%(outfile)s", type="png16m",width=16.9,height=11.8)
  93. d = read.table("%(datafile)s", sep = "\t")
  94. #d[["ts"]] <- as.POSIXct(d[["timestamp"]])
  95. k = setdiff(names(d),c("ts","timestamp"))
  96. #palette(rainbow(max(length(k),2)))
  97. palette(c("midnightblue", "gold", "turquoise", "plum4", "palegreen1", "OrangeRed", "green4", "blue",
  98. "magenta", "darkgoldenrod3", "tomato4", "violetred2","thistle4", "steelblue2", "springgreen4", "salmon","gray"))
  99. #plot(d[["runtime"]],d[["compress"]],type="l",col="blue")
  100. #lines(d[["runtime"]],d[["logremove"]],type="l",col="red")
  101. #legend(as.POSIXct("2008-12-05"),9500,"logremove",col="red",lty=1)
  102. #plot(d[["ts"]],d[["compress"]],type="l",col="blue")
  103. #lines(d[["ts"]],d[["logremove"]],type="l",col="red")
  104. barplot(t(d[,k]), col=palette(), xlab="date",ylab="time/minutes"
  105. )
  106. par(xpd = TRUE)
  107. legend(xinch(-1.2),par("usr")[4]+yinch(1),legend=k,
  108. ncol=3,fill=1:15) #,xjust=1,yjust=1)
  109. text(xinch(10),par("usr")[4]+yinch(.5),"%(title)s", cex=2)
  110. dev.off()
  111. q()
  112. """ % {'datafile': f.name, 'outfile': outfn,
  113. 'title': ((not showothers) * "partial ") + "dinstall times"}
  114. subprocess.run(["R", "--vanilla", "--slave"],
  115. input=script, stdout=subprocess.DEVNULL, text=True, check=True)
  116. for afn, params in graphs.items():
  117. dump_file(os.path.join(options.graph_dir, afn + '.png'), **params)