123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- #!/usr/bin/env python3
- """
- Generate an HTML page and PNG charts wih the age of packages in NEW
- To be run once a day.
- The tool stores persistent status in the RRD database and the pickeled queue
- in order to speed up daily runs.
- Directories and database files are created as needed.
- If the RRD directory is deleted the script rebuilds past years from the historical
- logs, and that can take a while, otherwise it just parses fresh data.
- """
- # Copyright 2020-2024 Federico Ceratto <federico@debian.org>
- # License: GPL-2+
- # Tests are in tests/test_graph_new.py
- from datetime import datetime, timedelta
- from glob import glob
- from textwrap import dedent
- from typing import Optional, Iterator
- from os import makedirs
- import pickle
- import os.path
- import sys
- import subprocess
- import rrdtool # debdeps: python3-rrdtool
- import apt_pkg
- from daklib import utils
- SECONDS_IN_DAY = 86400
- debug_mode = False
- def debug(msg: str) -> None:
- """Verbose messages for debugging"""
- if debug_mode:
- print(msg)
- def write_index(out_dir: str) -> None:
- """Generates index.html page in the output directory"""
- index_html = """
- <!DOCTYPE html>
- <html lang="en">
- <head>
- <meta charset="utf-8" />
- <link rel="shortcut icon" href="https://www.debian.org/favicon.ico" />
- <title>Debian NEW queue</title>
- </head>
- <body>
- <p>How much time a new package spends in the NEW queue.</p>
- <p>Divided in percentiles: 0 (aka age of the oldest package), 50 (aka median) and 99</p>
- <img src="new_queue_wait_time_all.png">
- <p></p>
- <img src="new_queue_wait_time_year.png">
- <p></p>
- <img src="new_queue_wait_time_month.png">
- <p></p>
- <p>Generated on {}</p>
- </body>
- </html>
- """
- fname = os.path.join(out_dir, "index.html")
- with open(fname, "w") as f:
- f.write(index_html.format(datetime.utcnow()))
- def init_rrd(rrdfn: str, t: datetime) -> None:
- """Initialize empty RRD"""
- print(f"RRD file {rrdfn} not found. Initializing it now.")
- rrdtool.create(
- rrdfn,
- "--start",
- t.strftime("%s"),
- "--step",
- str(SECONDS_IN_DAY), # 1 update per day
- "DS:p0:GAUGE:172800:U:U", # 0 percentile
- "DS:p50:GAUGE:172800:U:U", # 50 percentile
- "DS:p99:GAUGE:172800:U:U", # 99 percentile
- "RRA:AVERAGE:0.5:1:3650",
- )
- def gen_stats(t: datetime, q: dict) -> tuple:
- """Extract percentiles of package ages"""
- s = sorted(q.values())
- p99 = t - s[int(len(s) * 0.50)]
- p50 = t - s[int(len(s) * 0.05)]
- p0 = t - s[int(len(s) * 0.02)]
- return (p0, p50, p99)
- def plot_graph(
- out_dir: str, fname: str, rrdfn: str, start, end: datetime, title: str
- ) -> None:
- fname = os.path.join(out_dir, fname)
- print(f"Writing {fname}")
- args = (
- "--start",
- start.strftime("%s"),
- "--end",
- end.strftime("%s"),
- "--width",
- "900",
- "--height",
- "200",
- "--title",
- "Days spent in NEW over {}".format(title),
- "-v",
- "days",
- f"DEF:p0={rrdfn}:p0:AVERAGE",
- f"DEF:p50={rrdfn}:p50:AVERAGE",
- f"DEF:p99={rrdfn}:p99:AVERAGE",
- "LINE1:p0#FF8080:0 percentile",
- "LINE2:p50#8F8080:50 percentile",
- "LINE3:p99#8FFF80:99 percentile",
- )
- # print(" ".join(args))
- rrdtool.graph(fname, *args)
- def _handle_conf() -> tuple:
- """Load configuration parameters from Cnf"""
- Cnf = utils.get_conf()
- Arguments = [
- ("h", "help", "GraphNew::Options::Help"),
- ("r", "rrd", "GraphNew::Options::Rrd", "HasArg"),
- ("o", "outdir", "GraphNew::Options::Outdir", "HasArg"),
- ("b", "dak_base_dir", "Dir::Base", "HasArg"),
- ]
- apt_pkg.parse_commandline(Cnf, Arguments, sys.argv)
- try:
- Options = Cnf.subtree("GraphNew::Options")
- except KeyError:
- msg = dedent(
- """
- The GraphNew::Options subtree is missing from the config file
- Please use --rrd ... --outdir ... (see --help) or add:
- GraphNew
- {
- Options
- {
- Rrd "<RRD directory>";
- Outdir "<HTML output directory>";
- }
- }
- """
- )
- print(msg, file=sys.stderr)
- sys.exit(1)
- if "Help" in Options:
- usage()
- if "GraphNew::Options::Rrd" in Cnf:
- rrd_dir = Cnf["GraphNew::Options::Rrd"]
- elif "Dir::Rrd" in Cnf:
- rrd_dir = Cnf["Dir::Rrd"]
- else:
- print("No RRD directory configured.\n", file=sys.stderr)
- sys.exit(1)
- try:
- outdir = Cnf["GraphNew::Options::Outdir"]
- except Exception:
- print("No output directory configured\n", file=sys.stderr)
- sys.exit(1)
- log_dir = Cnf.get("Dir::Log")
- assert log_dir, "Dir::Log is missing from dak.conf or empty"
- return rrd_dir, outdir, log_dir
- def skip_file(fn: str, last_update) -> bool:
- """Skip files that are already processed"""
- if last_update is None:
- return False
- fn = os.path.split(fn)[1]
- basename, ext = os.path.splitext(fn)
- if ext == "":
- return False # always process last, uncompressed file
- filetime = datetime.strptime(basename, "%Y-%m")
- return filetime < last_update
- def extract_queue_events(
- log_dir: str, last_update: Optional[datetime]
- ) -> Iterator[tuple]:
- """Extracts NEW queue events from log files"""
- compressed_glob = os.path.join(log_dir, "20*xz")
- debug(f"Scanning for compressed logfiles using glob '{compressed_glob}'")
- compressed_fns = sorted(glob(compressed_glob))
- fns = compressed_fns[5:] # 5 oldest logfiles have no events
- current_fn = os.path.join(log_dir, datetime.utcnow().strftime("%Y-%m"))
- debug(f"Also adding uncompressed log {current_fn}")
- fns.append(current_fn)
- print("%d files to process" % len(fns))
- events_per_month_cnt = None
- for fn in fns:
- if skip_file(fn, last_update):
- debug(f"Skipping {fn}")
- continue
- if events_per_month_cnt is not None:
- debug(f"Events in the month: {events_per_month_cnt}")
- events_per_month_cnt = 0
- debug(f"Processing {fn}")
- cmd = (
- """xzgrep -h -e ACCEPT-TO-NEW -e "|NEW ACCEPT|" -e "|REJECT|" """
- f"""-e "|NEW REJECT|" -e "|Policy Queue " {fn}"""
- )
- data = subprocess.check_output(cmd, shell=True).decode()
- for line in data.splitlines():
- line = line.rstrip()
- if line.startswith("#"):
- continue
- try:
- # <timestamp> <stage> <actor> <event> ...
- ts, _, _, event, remaining = line.split("|", 4)
- except ValueError:
- continue
- if event.startswith("Policy Queue "):
- _, pname = remaining.split("|", 1)
- else:
- pname = remaining
- assert "|" not in pname, repr(line)
- assert len(ts) == 14
- event_time = datetime.strptime(ts, "%Y%m%d%H%M%S")
- events_per_month_cnt += 1
- yield pname, event, event_time
- def process_events(
- events, last_update: Optional[datetime], init_time: Optional[datetime], queue
- ) -> tuple:
- """Process logged events like ACCEPT-TO-NEW, ACCEPT, REJECT and
- update the RRD database accordingly"""
- glitch_cnt = 0
- processed_events_cnt = 0
- previous_day = None
- rrdtool_updates = []
- for pname, event, event_time in events:
- if last_update and last_update >= event_time:
- continue
- processed_events_cnt += 1
- if init_time is None:
- # Initialise RRD once
- i = event_time - timedelta(days=1)
- debug(f"Very first event at: {event_time}")
- init_time = i
- previous_day = event_time.date()
- pname = pname.rsplit(".", 1)[0]
- # Update queue dict
- exit_events = set(
- (
- "NEW ACCEPT",
- "NEW REJECT",
- "Policy Queue ACCEPT",
- "REJECT",
- "Policy Queue REJECT",
- )
- )
- if event == "ACCEPT-TO-NEW":
- queue[pname] = event_time
- elif event in exit_events:
- # Approved or rejected, exits NEW
- if pname in queue:
- queue.pop(pname)
- else:
- glitch_cnt += 1
- elif event == "ACCEPT":
- pass
- if event_time.date() != previous_day:
- # On day change run statistics, update RRD and queue.pickle file
- stats = gen_stats(event_time, queue)
- ux = event_time.strftime("%s")
- v = f"{ux}:{stats[0].days}:{stats[1].days}:{stats[2].days}"
- rrdtool_updates.append(v)
- previous_day = event_time.date()
- debug(f"glitches count: {glitch_cnt}")
- debug(f"processed events count: {processed_events_cnt}")
- debug(f"queue len: {len(queue)}")
- return rrdtool_updates, init_time
- def generate_output(rrdfn: str, out_dir: str, init_time) -> None:
- """Generate png charts and index.html in the output directory"""
- end_time = rrdtool.lastupdate(rrdfn)["date"]
- ofn = "new_queue_wait_time_all.png"
- plot_graph(out_dir, ofn, rrdfn, init_time, end_time, "all")
- start_time = end_time - timedelta(days=365)
- ofn = "new_queue_wait_time_year.png"
- plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one year")
- start_time = end_time - timedelta(days=30)
- ofn = "new_queue_wait_time_month.png"
- plot_graph(out_dir, ofn, rrdfn, start_time, end_time, "one month")
- write_index(out_dir)
- def main(verbose_debug=False) -> None:
- global debug_mode
- debug_mode = verbose_debug
- rrd_dir, out_dir, log_dir = _handle_conf()
- makedirs(rrd_dir, exist_ok=True)
- makedirs(out_dir, exist_ok=True)
- rrdfn = os.path.join(rrd_dir, "graph_new.rrd")
- queue_fn = os.path.join(rrd_dir, "graph_new.pickle")
- init_time: Optional[datetime] = None
- last_update: Optional[datetime] = None
- if os.path.isfile(rrdfn):
- init_time = datetime.fromtimestamp(rrdtool.first(rrdfn))
- last_update = rrdtool.lastupdate(rrdfn)["date"]
- with open(queue_fn, "rb") as f:
- queue = pickle.load(f)
- else:
- queue = {}
- events = tuple(extract_queue_events(log_dir, last_update))
- print(f"{len(events)} events to process")
- if events:
- debug(f"First event to process at: {events[0][2]}")
- rrdtool_updates, begin_time = process_events(events, last_update, init_time, queue)
- if rrdtool_updates:
- debug(f"First RRD update: {rrdtool_updates[0]}")
- with open(queue_fn, "wb") as f:
- pickle.dump(queue, f)
- if init_time is None:
- assert begin_time, "This happens only if no events are found at all"
- init_time = begin_time
- init_rrd(rrdfn, begin_time)
- for u in rrdtool_updates:
- rrdtool.update(rrdfn, u)
- generate_output(rrdfn, out_dir, init_time)
- def usage(exit_code=0) -> None:
- msg = dedent(
- """\
- Usage: dak graph-new
- Graphs the age of packages in the NEW queue.
- -h, --help show this help and exit.
- -r, --rrd=key Directory where rrd files to be updated are stored
- -o, --outdir=key Directory where the output is stored
- """
- )
- print(msg)
- sys.exit(exit_code)
- # Uncomment this for manual/debugging runs
- # if __name__ == "__main__":
- # main(verbose_debug=True)
|