gnatsparse.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808
  1. try:
  2. # Using Psyco makes it about 25% faster, but there's a bug in psyco in
  3. # handling of eval causing it to use unlimited memory with the magic
  4. # file enabled.
  5. # import psyco
  6. # psyco.full()
  7. # from psyco.classes import *
  8. pass
  9. except:
  10. pass
  11. import re
  12. import base64
  13. import cStringIO
  14. import specialuu
  15. import array
  16. import email.Utils
  17. import zlib
  18. import magic
  19. # Comment out if you don't want magic detection
  20. magicf = magic.MagicFile()
  21. # Open our output file
  22. outfile = open("gnats2bz_data.sql", "w")
  23. # List of GNATS fields
  24. fieldnames = ("Number", "Category", "Synopsis", "Confidential", "Severity",
  25. "Priority", "Responsible", "State", "Quarter", "Keywords",
  26. "Date-Required", "Class", "Submitter-Id", "Arrival-Date",
  27. "Closed-Date", "Last-Modified", "Originator", "Release",
  28. "Organization", "Environment", "Description", "How-To-Repeat",
  29. "Fix", "Release-Note", "Audit-Trail", "Unformatted")
  30. # Dictionary telling us which GNATS fields are multiline
  31. multilinefields = {"Organization":1, "Environment":1, "Description":1,
  32. "How-To-Repeat":1, "Fix":1, "Release-Note":1,
  33. "Audit-Trail":1, "Unformatted":1}
  34. # Mapping of GCC release to version. Our version string is updated every
  35. # so we need to funnel all release's with 3.4 in the string to be version
  36. # 3.4 for bug tracking purposes
  37. # The key is a regex to match, the value is the version it corresponds
  38. # with
  39. releasetovermap = {r"3\.4":"3.4", r"3\.3":"3.3", r"3\.2\.2":"3.2.2",
  40. r"3\.2\.1":"3.2.1", r"3\.2":"3.2", r"3\.1\.2":"3.1.2",
  41. r"3\.1\.1":"3.1.1", r"3\.1":"3.1", r"3\.0\.4":"3.0.4",
  42. r"3\.0\.3":"3.0.3", r"3\.0\.2":"3.0.2", r"3\.0\.1":"3.0.1",
  43. r"3\.0":"3.0", r"2\.95\.4":"2.95.4", r"2\.95\.3":"2.95.3",
  44. r"2\.95\.2":"2.95.2", r"2\.95\.1":"2.95.1",
  45. r"2\.95":"2.95", r"2\.97":"2.97",
  46. r"2\.96.*[rR][eE][dD].*[hH][aA][tT]":"2.96 (redhat)",
  47. r"2\.96":"2.96"}
  48. # These map the field name to the field id bugzilla assigns. We need
  49. # the id when doing bug activity.
  50. fieldids = {"State":8, "Responsible":15}
  51. # These are the keywords we use in gcc bug tracking. They are transformed
  52. # into bugzilla keywords. The format here is <keyword>-><bugzilla keyword id>
  53. keywordids = {"wrong-code":1, "ice-on-legal-code":2, "ice-on-illegal-code":3,
  54. "rejects-legal":4, "accepts-illegal":5, "pessimizes-code":6}
  55. # Map from GNATS states to Bugzilla states. Duplicates and reopened bugs
  56. # are handled when parsing the audit trail, so no need for them here.
  57. state_lookup = {"":"NEW", "open":"ASSIGNED", "analyzed":"ASSIGNED",
  58. "feedback":"WAITING", "closed":"CLOSED",
  59. "suspended":"SUSPENDED"}
  60. # Table of versions that exist in the bugs, built up as we go along
  61. versions_table = {}
  62. # Delimiter gnatsweb uses for attachments
  63. attachment_delimiter = "----gnatsweb-attachment----\n"
  64. # Here starts the various regular expressions we use
  65. # Matches an entire GNATS single line field
  66. gnatfieldre = re.compile(r"""^([>\w\-]+)\s*:\s*(.*)\s*$""")
  67. # Matches the name of a GNATS field
  68. fieldnamere = re.compile(r"""^>(.*)$""")
  69. # Matches the useless part of an envelope
  70. uselessre = re.compile(r"""^(\S*?):\s*""", re.MULTILINE)
  71. # Matches the filename in a content disposition
  72. dispositionre = re.compile("(\\S+);\\s*filename=\"([^\"]+)\"")
  73. # Matches the last changed date in the entire text of a bug
  74. # If you have other editable fields that get audit trail entries, modify this
  75. # The field names are explicitly listed in order to speed up matching
  76. lastdatere = re.compile(r"""^(?:(?:State|Responsible|Priority|Severity)-Changed-When: )(.+?)$""", re.MULTILINE)
  77. # Matches the From line of an email or the first line of an audit trail entry
  78. # We use this re to find the begin lines of all the audit trail entries
  79. # The field names are explicitly listed in order to speed up matching
  80. fromtore=re.compile(r"""^(?:(?:State|Responsible|Priority|Severity)-Changed-From-To: |From: )""", re.MULTILINE)
  81. # These re's match the various parts of an audit trail entry
  82. changedfromtore=re.compile(r"""^(\w+?)-Changed-From-To: (.+?)$""", re.MULTILINE)
  83. changedbyre=re.compile(r"""^\w+?-Changed-By: (.+?)$""", re.MULTILINE)
  84. changedwhenre=re.compile(r"""^\w+?-Changed-When: (.+?)$""", re.MULTILINE)
  85. changedwhyre=re.compile(r"""^\w+?-Changed-Why:\s*(.*?)$""", re.MULTILINE)
  86. # This re matches audit trail text saying that the current bug is a duplicate of another
  87. duplicatere=re.compile(r"""(?:")?Dup(?:licate)?(?:d)?(?:")? of .*?(\d+)""", re.IGNORECASE | re.MULTILINE)
  88. # Get the text of a From: line
  89. fromre=re.compile(r"""^From: (.*?)$""", re.MULTILINE)
  90. # Get the text of a Date: Line
  91. datere=re.compile(r"""^Date: (.*?)$""", re.MULTILINE)
  92. # Map of the responsible file to email addresses
  93. responsible_map = {}
  94. # List of records in the responsible file
  95. responsible_list = []
  96. # List of records in the categories file
  97. categories_list = []
  98. # List of pr's in the index
  99. pr_list = []
  100. # Map usernames to user ids
  101. usermapping = {}
  102. # Start with this user id
  103. userid_base = 2
  104. # Name of gnats user
  105. gnats_username = "gnats@gcc.gnu.org"
  106. # Name of unassigned user
  107. unassigned_username = "unassigned@gcc.gnu.org"
  108. gnats_db_dir = "."
  109. product = "gcc"
  110. productdesc = "GNU Compiler Connection"
  111. milestoneurl = "http://gcc/gnu.org"
  112. defaultmilestone = "3.4"
  113. def write_non_bug_tables():
  114. """ Write out the non-bug related tables, such as products, profiles, etc."""
  115. # Set all non-unconfirmed bugs's everconfirmed flag
  116. print >>outfile, "update bugs set everconfirmed=1 where bug_status != 'UNCONFIRMED';"
  117. # Set all bugs assigned to the unassigned user to NEW
  118. print >>outfile, "update bugs set bug_status='NEW',assigned_to='NULL' where bug_status='ASSIGNED' AND assigned_to=3;"
  119. # Insert the products
  120. print >>outfile, "\ninsert into products ("
  121. print >>outfile, " product, description, milestoneurl, disallownew,"
  122. print >>outfile, " defaultmilestone, votestoconfirm) values ("
  123. print >>outfile, " '%s', '%s', '%s', 0, '%s', 1);" % (product,
  124. productdesc,
  125. milestoneurl,
  126. defaultmilestone)
  127. # Insert the components
  128. for category in categories_list:
  129. component = SqlQuote(category[0])
  130. productstr = SqlQuote(product)
  131. description = SqlQuote(category[1])
  132. initialowner = SqlQuote("3")
  133. print >>outfile, "\ninsert into components (";
  134. print >>outfile, " value, program, initialowner, initialqacontact,"
  135. print >>outfile, " description) values ("
  136. print >>outfile, " %s, %s, %s, '', %s);" % (component, productstr,
  137. initialowner, description)
  138. # Insert the versions
  139. for productstr, version_list in versions_table.items():
  140. productstr = SqlQuote(productstr)
  141. for version in version_list:
  142. version = SqlQuote(version)
  143. print >>outfile, "\ninsert into versions (value, program) "
  144. print >>outfile, " values (%s, %s);" % (version, productstr)
  145. # Insert the users
  146. for username, userid in usermapping.items():
  147. realname = map_username_to_realname(username)
  148. username = SqlQuote(username)
  149. realname = SqlQuote(realname)
  150. print >>outfile, "\ninsert into profiles ("
  151. print >>outfile, " userid, login_name, password, cryptpassword, realname, groupset"
  152. print >>outfile, ") values ("
  153. print >>outfile, "%s,%s,'password',encrypt('password'), %s, 0);" % (userid, username, realname)
  154. print >>outfile, "update profiles set groupset=1 << 32 where login_name like '%\@gcc.gnu.org';"
  155. def unixdate2datetime(unixdate):
  156. """ Convert a unix date to a datetime value """
  157. year, month, day, hour, min, sec, x, x, x, x = email.Utils.parsedate_tz(unixdate)
  158. return "%d-%02d-%02d %02d:%02d:%02d" % (year,month,day,hour,min,sec)
  159. def unixdate2timestamp(unixdate):
  160. """ Convert a unix date to a timestamp value """
  161. year, month, day, hour, min, sec, x, x, x, x = email.Utils.parsedate_tz(unixdate)
  162. return "%d%02d%02d%02d%02d%02d" % (year,month,day,hour,min,sec)
  163. def SqlQuote(str):
  164. """ Perform SQL quoting on a string """
  165. return "'%s'" % str.replace("'", """''""").replace("\\", "\\\\").replace("\0","\\0")
  166. def convert_gccver_to_ver(gccver):
  167. """ Given a gcc version, convert it to a Bugzilla version. """
  168. for k in releasetovermap.keys():
  169. if re.search(".*%s.*" % k, gccver) is not None:
  170. return releasetovermap[k]
  171. result = re.search(r""".*(\d\.\d) \d+ \(experimental\).*""", gccver)
  172. if result is not None:
  173. return result.group(1)
  174. return "unknown"
  175. def load_index(fname):
  176. """ Load in the GNATS index file """
  177. global pr_list
  178. ifp = open(fname)
  179. for record in ifp.xreadlines():
  180. fields = record.split("|")
  181. pr_list.append(fields[0])
  182. ifp.close()
  183. def load_categories(fname):
  184. """ Load in the GNATS categories file """
  185. global categories_list
  186. cfp = open(fname)
  187. for record in cfp.xreadlines():
  188. if re.search("^#", record) is not None:
  189. continue
  190. categories_list.append(record.split(":"))
  191. cfp.close()
  192. def map_username_to_realname(username):
  193. """ Given a username, find the real name """
  194. name = username
  195. name = re.sub("@.*", "", name)
  196. for responsible_record in responsible_list:
  197. if responsible_record[0] == name:
  198. return responsible_record[1]
  199. if len(responsible_record) > 2:
  200. if responsible_record[2] == username:
  201. return responsible_record[1]
  202. return ""
  203. def get_userid(responsible):
  204. """ Given an email address, get the user id """
  205. global responsible_map
  206. global usermapping
  207. global userid_base
  208. if responsible is None:
  209. return -1
  210. responsible = responsible.lower()
  211. responsible = re.sub("sources.redhat.com", "gcc.gnu.org", responsible)
  212. if responsible_map.has_key(responsible):
  213. responsible = responsible_map[responsible]
  214. if usermapping.has_key(responsible):
  215. return usermapping[responsible]
  216. else:
  217. usermapping[responsible] = userid_base
  218. userid_base += 1
  219. return usermapping[responsible]
  220. def load_responsible(fname):
  221. """ Load in the GNATS responsible file """
  222. global responsible_map
  223. global responsible_list
  224. rfp = open(fname)
  225. for record in rfp.xreadlines():
  226. if re.search("^#", record) is not None:
  227. continue
  228. split_record = record.split(":")
  229. responsible_map[split_record[0]] = split_record[2].rstrip()
  230. responsible_list.append(record.split(":"))
  231. rfp.close()
  232. def split_csl(list):
  233. """ Split a comma separated list """
  234. newlist = re.split(r"""\s*,\s*""", list)
  235. return newlist
  236. def fix_email_addrs(addrs):
  237. """ Perform various fixups and cleaning on an e-mail address """
  238. addrs = split_csl(addrs)
  239. trimmed_addrs = []
  240. for addr in addrs:
  241. addr = re.sub(r"""\(.*\)""","",addr)
  242. addr = re.sub(r""".*<(.*)>.*""","\\1",addr)
  243. addr = addr.rstrip()
  244. addr = addr.lstrip()
  245. trimmed_addrs.append(addr)
  246. addrs = ", ".join(trimmed_addrs)
  247. return addrs
  248. class Bugzillabug(object):
  249. """ Class representing a bugzilla bug """
  250. def __init__(self, gbug):
  251. """ Initialize a bugzilla bug from a GNATS bug. """
  252. self.bug_id = gbug.bug_id
  253. self.long_descs = []
  254. self.bug_ccs = [get_userid("gcc-bugs@gcc.gnu.org")]
  255. self.bug_activity = []
  256. self.attachments = gbug.attachments
  257. self.gnatsfields = gbug.fields
  258. self.need_unformatted = gbug.has_unformatted_attach == 0
  259. self.need_unformatted &= gbug.fields.has_key("Unformatted")
  260. self.translate_pr()
  261. self.update_versions()
  262. if self.fields.has_key("Audit-Trail"):
  263. self.parse_audit_trail()
  264. self.write_bug()
  265. def parse_fromto(type, string):
  266. """ Parses the from and to parts of a changed-from-to line """
  267. fromstr = ""
  268. tostr = ""
  269. # Some slightly messed up changed lines have unassigned-new,
  270. # instead of unassigned->new. So we make the > optional.
  271. result = re.search(r"""(.*)-(?:>?)(.*)""", string)
  272. # Only know how to handle parsing of State and Responsible
  273. # changed-from-to right now
  274. if type == "State":
  275. fromstr = state_lookup[result.group(1)]
  276. tostr = state_lookup[result.group(2)]
  277. elif type == "Responsible":
  278. if result.group(1) != "":
  279. fromstr = result.group(1)
  280. if result.group(2) != "":
  281. tostr = result.group(2)
  282. if responsible_map.has_key(fromstr):
  283. fromstr = responsible_map[fromstr]
  284. if responsible_map.has_key(tostr):
  285. tostr = responsible_map[tostr]
  286. return (fromstr, tostr)
  287. parse_fromto = staticmethod(parse_fromto)
  288. def parse_audit_trail(self):
  289. """ Parse a GNATS audit trail """
  290. trail = self.fields["Audit-Trail"]
  291. # Begin to split the audit trail into pieces
  292. result = fromtore.finditer(trail)
  293. starts = []
  294. ends = []
  295. pieces = []
  296. # Make a list of the pieces
  297. for x in result:
  298. pieces.append (x)
  299. # Find the start and end of each piece
  300. if len(pieces) > 0:
  301. for x in xrange(len(pieces)-1):
  302. starts.append(pieces[x].start())
  303. ends.append(pieces[x+1].start())
  304. starts.append(pieces[-1].start())
  305. ends.append(len(trail))
  306. pieces = []
  307. # Now make the list of actual text of the pieces
  308. for x in xrange(len(starts)):
  309. pieces.append(trail[starts[x]:ends[x]])
  310. # And parse the actual pieces
  311. for piece in pieces:
  312. result = changedfromtore.search(piece)
  313. # See what things we actually have inside this entry, and
  314. # handle them appropriately
  315. if result is not None:
  316. type = result.group(1)
  317. changedfromto = result.group(2)
  318. # If the bug was reopened, mark it as such
  319. if changedfromto.find("closed->analyzed") != -1:
  320. if self.fields["bug_status"] == "'NEW'":
  321. self.fields["bug_status"] = "'REOPENED'"
  322. if type == "State" or type == "Responsible":
  323. oldstate, newstate = self.parse_fromto (type, changedfromto)
  324. result = changedbyre.search(piece)
  325. if result is not None:
  326. changedby = result.group(1)
  327. result = changedwhenre.search(piece)
  328. if result is not None:
  329. changedwhen = result.group(1)
  330. changedwhen = unixdate2datetime(changedwhen)
  331. changedwhen = SqlQuote(changedwhen)
  332. result = changedwhyre.search(piece)
  333. changedwhy = piece[result.start(1):]
  334. #changedwhy = changedwhy.lstrip()
  335. changedwhy = changedwhy.rstrip()
  336. changedby = get_userid(changedby)
  337. # Put us on the cc list if we aren't there already
  338. if changedby != self.fields["userid"] \
  339. and changedby not in self.bug_ccs:
  340. self.bug_ccs.append(changedby)
  341. # If it's a duplicate, mark it as such
  342. result = duplicatere.search(changedwhy)
  343. if result is not None:
  344. newtext = "*** This bug has been marked as a duplicate of %s ***" % result.group(1)
  345. newtext = SqlQuote(newtext)
  346. self.long_descs.append((self.bug_id, changedby,
  347. changedwhen, newtext))
  348. self.fields["bug_status"] = "'RESOLVED'"
  349. self.fields["resolution"] = "'DUPLICATE'"
  350. self.fields["userid"] = changedby
  351. else:
  352. newtext = "%s-Changed-From-To: %s\n%s-Changed-Why: %s\n" % (type, changedfromto, type, changedwhy)
  353. newtext = SqlQuote(newtext)
  354. self.long_descs.append((self.bug_id, changedby,
  355. changedwhen, newtext))
  356. if type == "State" or type == "Responsible":
  357. newstate = SqlQuote("%s" % newstate)
  358. oldstate = SqlQuote("%s" % oldstate)
  359. fieldid = fieldids[type]
  360. self.bug_activity.append((newstate, oldstate, fieldid, changedby, changedwhen))
  361. else:
  362. # It's an email
  363. result = fromre.search(piece)
  364. if result is None:
  365. continue
  366. fromstr = result.group(1)
  367. fromstr = fix_email_addrs(fromstr)
  368. fromstr = get_userid(fromstr)
  369. result = datere.search(piece)
  370. if result is None:
  371. continue
  372. datestr = result.group(1)
  373. datestr = SqlQuote(unixdate2timestamp(datestr))
  374. if fromstr != self.fields["userid"] \
  375. and fromstr not in self.bug_ccs:
  376. self.bug_ccs.append(fromstr)
  377. self.long_descs.append((self.bug_id, fromstr, datestr,
  378. SqlQuote(piece)))
  379. def write_bug(self):
  380. """ Output a bug to the data file """
  381. fields = self.fields
  382. print >>outfile, "\ninsert into bugs("
  383. print >>outfile, " bug_id, assigned_to, bug_severity, priority, bug_status, creation_ts, delta_ts,"
  384. print >>outfile, " short_desc,"
  385. print >>outfile, " reporter, version,"
  386. print >>outfile, " product, component, resolution, target_milestone, qa_contact,"
  387. print >>outfile, " gccbuild, gcctarget, gcchost, keywords"
  388. print >>outfile, " ) values ("
  389. print >>outfile, "%s, %s, %s, %s, %s, %s, %s," % (self.bug_id, fields["userid"], fields["bug_severity"], fields["priority"], fields["bug_status"], fields["creation_ts"], fields["delta_ts"])
  390. print >>outfile, "%s," % (fields["short_desc"])
  391. print >>outfile, "%s, %s," % (fields["reporter"], fields["version"])
  392. print >>outfile, "%s, %s, %s, %s, 0," %(fields["product"], fields["component"], fields["resolution"], fields["target_milestone"])
  393. print >>outfile, "%s, %s, %s, %s" % (fields["gccbuild"], fields["gcctarget"], fields["gcchost"], fields["keywords"])
  394. print >>outfile, ");"
  395. if self.fields["keywords"] != 0:
  396. print >>outfile, "\ninsert into keywords (bug_id, keywordid) values ("
  397. print >>outfile, " %s, %s);" % (self.bug_id, fields["keywordid"])
  398. for id, who, when, text in self.long_descs:
  399. print >>outfile, "\ninsert into longdescs ("
  400. print >>outfile, " bug_id, who, bug_when, thetext) values("
  401. print >>outfile, " %s, %s, %s, %s);" % (id, who, when, text)
  402. for name, data, who in self.attachments:
  403. print >>outfile, "\ninsert into attachments ("
  404. print >>outfile, " bug_id, filename, description, mimetype, ispatch, submitter_id) values ("
  405. ftype = None
  406. # It's *magic*!
  407. if name.endswith(".ii") == 1:
  408. ftype = "text/x-c++"
  409. elif name.endswith(".i") == 1:
  410. ftype = "text/x-c"
  411. else:
  412. ftype = magicf.detect(cStringIO.StringIO(data))
  413. if ftype is None:
  414. ftype = "application/octet-stream"
  415. print >>outfile, "%s,%s,%s, %s,0, %s,%s);" %(self.bug_id, SqlQuote(name), SqlQuote(name), SqlQuote (ftype), who)
  416. print >>outfile, "\ninsert into attach_data ("
  417. print >>outfile, "\n(id, thedata) values (last_insert_id(),"
  418. print >>outfile, "%s);" % (SqlQuote(zlib.compress(data)))
  419. for newstate, oldstate, fieldid, changedby, changedwhen in self.bug_activity:
  420. print >>outfile, "\ninsert into bugs_activity ("
  421. print >>outfile, " bug_id, who, bug_when, fieldid, added, removed) values ("
  422. print >>outfile, " %s, %s, %s, %s, %s, %s);" % (self.bug_id,
  423. changedby,
  424. changedwhen,
  425. fieldid,
  426. newstate,
  427. oldstate)
  428. for cc in self.bug_ccs:
  429. print >>outfile, "\ninsert into cc(bug_id, who) values (%s, %s);" %(self.bug_id, cc)
  430. def update_versions(self):
  431. """ Update the versions table to account for the version on this bug """
  432. global versions_table
  433. if self.fields.has_key("Release") == 0 \
  434. or self.fields.has_key("Category") == 0:
  435. return
  436. curr_product = "gcc"
  437. curr_version = self.fields["Release"]
  438. if curr_version == "":
  439. return
  440. curr_version = convert_gccver_to_ver (curr_version)
  441. if versions_table.has_key(curr_product) == 0:
  442. versions_table[curr_product] = []
  443. for version in versions_table[curr_product]:
  444. if version == curr_version:
  445. return
  446. versions_table[curr_product].append(curr_version)
  447. def translate_pr(self):
  448. """ Transform a GNATS PR into a Bugzilla bug """
  449. self.fields = self.gnatsfields
  450. if (self.fields.has_key("Organization") == 0) \
  451. or self.fields["Organization"].find("GCC"):
  452. self.fields["Originator"] = ""
  453. self.fields["Organization"] = ""
  454. self.fields["Organization"].lstrip()
  455. if (self.fields.has_key("Release") == 0) \
  456. or self.fields["Release"] == "" \
  457. or self.fields["Release"].find("unknown-1.0") != -1:
  458. self.fields["Release"]="unknown"
  459. if self.fields.has_key("Responsible"):
  460. result = re.search(r"""\w+""", self.fields["Responsible"])
  461. self.fields["Responsible"] = "%s%s" % (result.group(0), "@gcc.gnu.org")
  462. self.fields["gcchost"] = ""
  463. self.fields["gcctarget"] = ""
  464. self.fields["gccbuild"] = ""
  465. if self.fields.has_key("Environment"):
  466. result = re.search("^host: (.+?)$", self.fields["Environment"],
  467. re.MULTILINE)
  468. if result is not None:
  469. self.fields["gcchost"] = result.group(1)
  470. result = re.search("^target: (.+?)$", self.fields["Environment"],
  471. re.MULTILINE)
  472. if result is not None:
  473. self.fields["gcctarget"] = result.group(1)
  474. result = re.search("^build: (.+?)$", self.fields["Environment"],
  475. re.MULTILINE)
  476. if result is not None:
  477. self.fields["gccbuild"] = result.group(1)
  478. self.fields["userid"] = get_userid(self.fields["Responsible"])
  479. self.fields["bug_severity"] = "normal"
  480. if self.fields["Class"] == "change-request":
  481. self.fields["bug_severity"] = "enhancement"
  482. elif self.fields.has_key("Severity"):
  483. if self.fields["Severity"] == "critical":
  484. self.fields["bug_severity"] = "critical"
  485. elif self.fields["Severity"] == "serious":
  486. self.fields["bug_severity"] = "major"
  487. elif self.fields.has_key("Synopsis"):
  488. if re.search("crash|assert", self.fields["Synopsis"]):
  489. self.fields["bug_severity"] = "critical"
  490. elif re.search("wrong|error", self.fields["Synopsis"]):
  491. self.fields["bug_severity"] = "major"
  492. self.fields["bug_severity"] = SqlQuote(self.fields["bug_severity"])
  493. self.fields["keywords"] = 0
  494. if keywordids.has_key(self.fields["Class"]):
  495. self.fields["keywords"] = self.fields["Class"]
  496. self.fields["keywordid"] = keywordids[self.fields["Class"]]
  497. self.fields["keywords"] = SqlQuote(self.fields["keywords"])
  498. self.fields["priority"] = "P1"
  499. if self.fields.has_key("Severity") and self.fields.has_key("Priority"):
  500. severity = self.fields["Severity"]
  501. priority = self.fields["Priority"]
  502. if severity == "critical":
  503. if priority == "high":
  504. self.fields["priority"] = "P1"
  505. else:
  506. self.fields["priority"] = "P2"
  507. elif severity == "serious":
  508. if priority == "low":
  509. self.fields["priority"] = "P4"
  510. else:
  511. self.fields["priority"] = "P3"
  512. else:
  513. if priority == "high":
  514. self.fields["priority"] = "P4"
  515. else:
  516. self.fields["priority"] = "P5"
  517. self.fields["priority"] = SqlQuote(self.fields["priority"])
  518. state = self.fields["State"]
  519. if (state == "open" or state == "analyzed") and self.fields["userid"] != 3:
  520. self.fields["bug_status"] = "ASSIGNED"
  521. self.fields["resolution"] = ""
  522. elif state == "feedback":
  523. self.fields["bug_status"] = "WAITING"
  524. self.fields["resolution"] = ""
  525. elif state == "closed":
  526. self.fields["bug_status"] = "CLOSED"
  527. if self.fields.has_key("Class"):
  528. theclass = self.fields["Class"]
  529. if theclass.find("duplicate") != -1:
  530. self.fields["resolution"]="DUPLICATE"
  531. elif theclass.find("mistaken") != -1:
  532. self.fields["resolution"]="INVALID"
  533. else:
  534. self.fields["resolution"]="FIXED"
  535. else:
  536. self.fields["resolution"]="FIXED"
  537. elif state == "suspended":
  538. self.fields["bug_status"] = "SUSPENDED"
  539. self.fields["resolution"] = ""
  540. elif state == "analyzed" and self.fields["userid"] == 3:
  541. self.fields["bug_status"] = "NEW"
  542. self.fields["resolution"] = ""
  543. else:
  544. self.fields["bug_status"] = "UNCONFIRMED"
  545. self.fields["resolution"] = ""
  546. self.fields["bug_status"] = SqlQuote(self.fields["bug_status"])
  547. self.fields["resolution"] = SqlQuote(self.fields["resolution"])
  548. self.fields["creation_ts"] = ""
  549. if self.fields.has_key("Arrival-Date") and self.fields["Arrival-Date"] != "":
  550. self.fields["creation_ts"] = unixdate2datetime(self.fields["Arrival-Date"])
  551. self.fields["creation_ts"] = SqlQuote(self.fields["creation_ts"])
  552. self.fields["delta_ts"] = ""
  553. if self.fields.has_key("Audit-Trail"):
  554. result = lastdatere.findall(self.fields["Audit-Trail"])
  555. result.reverse()
  556. if len(result) > 0:
  557. self.fields["delta_ts"] = unixdate2timestamp(result[0])
  558. if self.fields["delta_ts"] == "":
  559. if self.fields.has_key("Arrival-Date") and self.fields["Arrival-Date"] != "":
  560. self.fields["delta_ts"] = unixdate2timestamp(self.fields["Arrival-Date"])
  561. self.fields["delta_ts"] = SqlQuote(self.fields["delta_ts"])
  562. self.fields["short_desc"] = SqlQuote(self.fields["Synopsis"])
  563. if self.fields.has_key("Reply-To") and self.fields["Reply-To"] != "":
  564. self.fields["reporter"] = get_userid(self.fields["Reply-To"])
  565. elif self.fields.has_key("Mail-Header"):
  566. result = re.search(r"""From .*?([\w.]+@[\w.]+)""", self.fields["Mail-Header"])
  567. if result:
  568. self.fields["reporter"] = get_userid(result.group(1))
  569. else:
  570. self.fields["reporter"] = get_userid(gnats_username)
  571. else:
  572. self.fields["reporter"] = get_userid(gnats_username)
  573. long_desc = self.fields["Description"]
  574. long_desc2 = ""
  575. for field in ["Release", "Environment", "How-To-Repeat"]:
  576. if self.fields.has_key(field) and self.fields[field] != "":
  577. long_desc += ("\n\n%s:\n" % field) + self.fields[field]
  578. if self.fields.has_key("Fix") and self.fields["Fix"] != "":
  579. long_desc2 = "Fix:\n" + self.fields["Fix"]
  580. if self.need_unformatted == 1 and self.fields["Unformatted"] != "":
  581. long_desc += "\n\nUnformatted:\n" + self.fields["Unformatted"]
  582. if long_desc != "":
  583. self.long_descs.append((self.bug_id, self.fields["reporter"],
  584. self.fields["creation_ts"],
  585. SqlQuote(long_desc)))
  586. if long_desc2 != "":
  587. self.long_descs.append((self.bug_id, self.fields["reporter"],
  588. self.fields["creation_ts"],
  589. SqlQuote(long_desc2)))
  590. for field in ["gcchost", "gccbuild", "gcctarget"]:
  591. self.fields[field] = SqlQuote(self.fields[field])
  592. self.fields["version"] = ""
  593. if self.fields["Release"] != "":
  594. self.fields["version"] = convert_gccver_to_ver (self.fields["Release"])
  595. self.fields["version"] = SqlQuote(self.fields["version"])
  596. self.fields["product"] = SqlQuote("gcc")
  597. self.fields["component"] = "invalid"
  598. if self.fields.has_key("Category"):
  599. self.fields["component"] = self.fields["Category"]
  600. self.fields["component"] = SqlQuote(self.fields["component"])
  601. self.fields["target_milestone"] = "---"
  602. if self.fields["version"].find("3.4") != -1:
  603. self.fields["target_milestone"] = "3.4"
  604. self.fields["target_milestone"] = SqlQuote(self.fields["target_milestone"])
  605. if self.fields["userid"] == 2:
  606. self.fields["userid"] = "\'NULL\'"
  607. class GNATSbug(object):
  608. """ Represents a single GNATS PR """
  609. def __init__(self, filename):
  610. self.attachments = []
  611. self.has_unformatted_attach = 0
  612. fp = open (filename)
  613. self.fields = self.parse_pr(fp.xreadlines())
  614. self.bug_id = int(self.fields["Number"])
  615. if self.fields.has_key("Unformatted"):
  616. self.find_gnatsweb_attachments()
  617. if self.fields.has_key("How-To-Repeat"):
  618. self.find_regular_attachments("How-To-Repeat")
  619. if self.fields.has_key("Fix"):
  620. self.find_regular_attachments("Fix")
  621. def get_attacher(fields):
  622. if fields.has_key("Reply-To") and fields["Reply-To"] != "":
  623. return get_userid(fields["Reply-To"])
  624. else:
  625. result = None
  626. if fields.has_key("Mail-Header"):
  627. result = re.search(r"""From .*?([\w.]+\@[\w.]+)""",
  628. fields["Mail-Header"])
  629. if result is not None:
  630. reporter = get_userid(result.group(1))
  631. else:
  632. reporter = get_userid(gnats_username)
  633. get_attacher = staticmethod(get_attacher)
  634. def find_regular_attachments(self, which):
  635. fields = self.fields
  636. while re.search("^begin [0-7]{3}", fields[which],
  637. re.DOTALL | re.MULTILINE):
  638. outfp = cStringIO.StringIO()
  639. infp = cStringIO.StringIO(fields[which])
  640. filename, start, end = specialuu.decode(infp, outfp, quiet=0)
  641. fields[which]=fields[which].replace(fields[which][start:end],
  642. "See attachments for %s\n" % filename)
  643. self.attachments.append((filename, outfp.getvalue(),
  644. self.get_attacher(fields)))
  645. def decode_gnatsweb_attachment(self, attachment):
  646. result = re.split(r"""\n\n""", attachment, 1)
  647. if len(result) == 1:
  648. return -1
  649. envelope, body = result
  650. envelope = uselessre.split(envelope)
  651. envelope.pop(0)
  652. # Turn the list of key, value into a dict of key => value
  653. attachinfo = dict([(envelope[i], envelope[i+1]) for i in xrange(0,len(envelope),2)])
  654. for x in attachinfo.keys():
  655. attachinfo[x] = attachinfo[x].rstrip()
  656. if (attachinfo.has_key("Content-Type") == 0) or \
  657. (attachinfo.has_key("Content-Disposition") == 0):
  658. raise ValueError, "Unable to parse file attachment"
  659. result = dispositionre.search(attachinfo["Content-Disposition"])
  660. filename = result.group(2)
  661. filename = re.sub(".*/","", filename)
  662. filename = re.sub(".*\\\\","", filename)
  663. attachinfo["filename"]=filename
  664. result = re.search("""(\S+);.*""", attachinfo["Content-Type"])
  665. if result is not None:
  666. attachinfo["Content-Type"] = result.group(1)
  667. if attachinfo.has_key("Content-Transfer-Encoding"):
  668. if attachinfo["Content-Transfer-Encoding"] == "base64":
  669. attachinfo["data"] = base64.decodestring(body)
  670. else:
  671. attachinfo["data"]=body
  672. return (attachinfo["filename"], attachinfo["data"],
  673. self.get_attacher(self.fields))
  674. def find_gnatsweb_attachments(self):
  675. fields = self.fields
  676. attachments = re.split(attachment_delimiter, fields["Unformatted"])
  677. fields["Unformatted"] = attachments.pop(0)
  678. for attachment in attachments:
  679. result = self.decode_gnatsweb_attachment (attachment)
  680. if result != -1:
  681. self.attachments.append(result)
  682. self.has_unformatted_attach = 1
  683. def parse_pr(lines):
  684. #fields = {"envelope":[]}
  685. fields = {"envelope":array.array("c")}
  686. hdrmulti = "envelope"
  687. for line in lines:
  688. line = line.rstrip('\n')
  689. line += '\n'
  690. result = gnatfieldre.search(line)
  691. if result is None:
  692. if hdrmulti != "":
  693. if fields.has_key(hdrmulti):
  694. #fields[hdrmulti].append(line)
  695. fields[hdrmulti].fromstring(line)
  696. else:
  697. #fields[hdrmulti] = [line]
  698. fields[hdrmulti] = array.array("c", line)
  699. continue
  700. hdr, arg = result.groups()
  701. ghdr = "*not valid*"
  702. result = fieldnamere.search(hdr)
  703. if result != None:
  704. ghdr = result.groups()[0]
  705. if ghdr in fieldnames:
  706. if multilinefields.has_key(ghdr):
  707. hdrmulti = ghdr
  708. #fields[ghdr] = [""]
  709. fields[ghdr] = array.array("c")
  710. else:
  711. hdrmulti = ""
  712. #fields[ghdr] = [arg]
  713. fields[ghdr] = array.array("c", arg)
  714. elif hdrmulti != "":
  715. #fields[hdrmulti].append(line)
  716. fields[hdrmulti].fromstring(line)
  717. if hdrmulti == "envelope" and \
  718. (hdr == "Reply-To" or hdr == "From" \
  719. or hdr == "X-GNATS-Notify"):
  720. arg = fix_email_addrs(arg)
  721. #fields[hdr] = [arg]
  722. fields[hdr] = array.array("c", arg)
  723. if fields.has_key("Reply-To") and len(fields["Reply-To"]) > 0:
  724. fields["Reply-To"] = fields["Reply-To"]
  725. else:
  726. fields["Reply-To"] = fields["From"]
  727. if fields.has_key("From"):
  728. del fields["From"]
  729. if fields.has_key("X-GNATS-Notify") == 0:
  730. fields["X-GNATS-Notify"] = array.array("c")
  731. #fields["X-GNATS-Notify"] = ""
  732. for x in fields.keys():
  733. fields[x] = fields[x].tostring()
  734. #fields[x] = "".join(fields[x])
  735. for x in fields.keys():
  736. if multilinefields.has_key(x):
  737. fields[x] = fields[x].rstrip()
  738. return fields
  739. parse_pr = staticmethod(parse_pr)
  740. load_index("%s/gnats-adm/index" % gnats_db_dir)
  741. load_categories("%s/gnats-adm/categories" % gnats_db_dir)
  742. load_responsible("%s/gnats-adm/responsible" % gnats_db_dir)
  743. get_userid(gnats_username)
  744. get_userid(unassigned_username)
  745. for x in pr_list:
  746. print "Processing %s..." % x
  747. a = GNATSbug ("%s/%s" % (gnats_db_dir, x))
  748. b = Bugzillabug(a)
  749. write_non_bug_tables()
  750. outfile.close()