queue_rss.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #! /usr/bin/env python3
  2. # Generate two rss feeds for a directory with .changes file
  3. # License: GPL v2 or later
  4. # Author: Filippo Giunchedi <filippo@debian.org>
  5. # Version: 0.5
  6. import html
  7. import os
  8. import os.path
  9. import pickle
  10. import re
  11. import sys
  12. import time
  13. from optparse import OptionParser
  14. from datetime import datetime
  15. from email.utils import parseaddr
  16. import PyRSS2Gen
  17. from debian.deb822 import Changes
  18. inrss_filename = "NEW_in.rss"
  19. outrss_filename = "NEW_out.rss"
  20. db_filename = "status.db"
  21. parser = OptionParser()
  22. parser.set_defaults(queuedir="queue", outdir="out", datadir="status",
  23. logdir="log", max_entries="30")
  24. parser.add_option("-q", "--queuedir", dest="queuedir",
  25. help="The queue dir (%default)")
  26. parser.add_option("-o", "--outdir", dest="outdir",
  27. help="The output directory (%default)")
  28. parser.add_option("-d", "--datadir", dest="datadir",
  29. help="The data dir (%default)")
  30. parser.add_option("-l", "--logdir", dest="logdir",
  31. help="The ACCEPT/REJECT dak log dir (%default)")
  32. parser.add_option("-m", "--max-entries", dest="max_entries", type="int",
  33. help="Max number of entries to keep (%default)")
  34. class Status:
  35. def __init__(self):
  36. self.feed_in = PyRSS2Gen.RSS2(
  37. title="Packages entering NEW",
  38. link="https://ftp-master.debian.org/new.html",
  39. description="Debian packages entering the NEW queue")
  40. self.feed_out = PyRSS2Gen.RSS2(
  41. title="Packages leaving NEW",
  42. link="https://ftp-master.debian.org/new.html",
  43. description="Debian packages leaving the NEW queue")
  44. self.queue = {}
  45. def purge_old_items(feed, max):
  46. """ Purge RSSItem from feed, no more than max. """
  47. if feed.items is None or len(feed.items) == 0:
  48. return False
  49. feed.items = feed.items[:max]
  50. return True
  51. def parse_changes(fname):
  52. """ Parse a .changes file named fname.
  53. Return {fname: parsed} """
  54. m = Changes(open(fname))
  55. wanted_fields = set(['Source', 'Version', 'Architecture', 'Distribution',
  56. 'Date', 'Changed-By', 'Description', 'Changes'])
  57. if not set(m.keys()).issuperset(wanted_fields):
  58. return None
  59. return {os.path.basename(fname): m}
  60. def parse_queuedir(dir):
  61. """ Parse dir for .changes files.
  62. Return a dictionary {filename: parsed_file}"""
  63. if not os.path.exists(dir):
  64. return None
  65. res = {}
  66. for fname in os.listdir(dir):
  67. if not fname.endswith(".changes"):
  68. continue
  69. parsed = parse_changes(os.path.join(dir, fname))
  70. if parsed:
  71. res.update(parsed)
  72. return res
  73. def parse_leave_reason(fname):
  74. """ Parse a dak log file fname for ACCEPT/REJECT reason from process-new.
  75. Return a dictionary {filename: reason}"""
  76. reason_re = re.compile(r".+\|process-new\|(.+)\|NEW (ACCEPT|REJECT)\|(\S+)")
  77. try:
  78. f = open(fname)
  79. except IOError as e:
  80. print("Can't open %s: %s" % (fname, e), file=sys.stderr)
  81. return {}
  82. res = {}
  83. for l in f.readlines():
  84. m = reason_re.search(l)
  85. if m:
  86. res[m.group(3)] = (m.group(2), m.group(1))
  87. f.close()
  88. return res
  89. def add_rss_item(status, msg, direction):
  90. if direction == "in":
  91. feed = status.feed_in
  92. title = "%s %s entered NEW" % (msg['Source'], msg['Version'])
  93. pubdate = msg['Date']
  94. elif direction == "out":
  95. feed = status.feed_out
  96. if 'Leave-Reason' in msg:
  97. title = "%s %s left NEW (%s)" % (msg['Source'], msg['Version'],
  98. msg['Leave-Reason'][0])
  99. else:
  100. title = "%s %s left NEW" % (msg['Source'], msg['Version'])
  101. pubdate = datetime.utcnow()
  102. else:
  103. return False
  104. description = "<pre>Description: %s\nChanges: %s\n</pre>" % \
  105. (html.escape(msg['Description']),
  106. html.escape(msg['Changes']))
  107. link = "https://ftp-master.debian.org/new/%s_%s.html" % \
  108. (msg['Source'], msg['Version'])
  109. guid = msg['Checksums-Sha256'][0]['sha256']
  110. if 'Processed-By' in msg:
  111. author = msg['Processed-By']
  112. else:
  113. changedby = parseaddr(msg['Changed-By'])
  114. author = "%s (%s)" % (changedby[1], changedby[0])
  115. feed.items.insert(0,
  116. PyRSS2Gen.RSSItem(
  117. title,
  118. pubDate=pubdate,
  119. description=description,
  120. author=html.escape(author),
  121. link=link,
  122. guid=guid
  123. )
  124. )
  125. def update_feeds(curqueue, status, settings):
  126. # inrss -> append all items in curqueue not in status.queue
  127. # outrss -> append all items in status.queue not in curqueue
  128. leave_reason = None
  129. # logfile from dak's process-new
  130. reason_log = os.path.join(settings.logdir, time.strftime("%Y-%m"))
  131. for (name, parsed) in curqueue.items():
  132. if name not in status.queue:
  133. # new package
  134. add_rss_item(status, parsed, "in")
  135. for (name, parsed) in status.queue.items():
  136. if name not in curqueue:
  137. # removed package, try to find out why
  138. if leave_reason is None:
  139. leave_reason = parse_leave_reason(reason_log)
  140. if leave_reason and name in leave_reason:
  141. parsed['Leave-Reason'] = leave_reason[name][0]
  142. parsed['Processed-By'] = leave_reason[name][1] + "@debian.org"
  143. add_rss_item(status, parsed, "out")
  144. if __name__ == "__main__":
  145. (settings, args) = parser.parse_args()
  146. if not os.path.exists(settings.outdir):
  147. print("Outdir '%s' does not exists" % settings.outdir, file=sys.stderr)
  148. parser.print_help()
  149. sys.exit(1)
  150. if not os.path.exists(settings.datadir):
  151. print("Datadir '%s' does not exists" % settings.datadir, file=sys.stderr)
  152. parser.print_help()
  153. sys.exit(1)
  154. status_db = os.path.join(settings.datadir, db_filename)
  155. try:
  156. with open(status_db, 'rb') as fh:
  157. try:
  158. status = pickle.load(fh, encoding="utf-8")
  159. except UnicodeDecodeError:
  160. fh.seek(0)
  161. status = pickle.load(fh, encoding="latin-1")
  162. except IOError:
  163. status = Status()
  164. current_queue = parse_queuedir(settings.queuedir)
  165. update_feeds(current_queue, status, settings)
  166. purge_old_items(status.feed_in, settings.max_entries)
  167. purge_old_items(status.feed_out, settings.max_entries)
  168. feed_in_file = os.path.join(settings.outdir, inrss_filename)
  169. feed_out_file = os.path.join(settings.outdir, outrss_filename)
  170. try:
  171. status.feed_in.write_xml(open(feed_in_file, "w+"), "utf-8")
  172. status.feed_out.write_xml(open(feed_out_file, "w+"), "utf-8")
  173. except IOError as why:
  174. print("Unable to write feeds:", why, file=sys.stderr)
  175. sys.exit(1)
  176. status.queue = current_queue
  177. try:
  178. with open(status_db, 'wb+') as fh:
  179. pickle.dump(status, fh)
  180. except IOError as why:
  181. print("Unable to save status:", why, file=sys.stderr)
  182. sys.exit(1)
  183. # vim:et:ts=4