collectstats.pl 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. #!/usr/bin/env perl -w
  2. # -*- Mode: perl; indent-tabs-mode: nil -*-
  3. #
  4. # The contents of this file are subject to the Mozilla Public
  5. # License Version 1.1 (the "License"); you may not use this file
  6. # except in compliance with the License. You may obtain a copy of
  7. # the License at http://www.mozilla.org/MPL/
  8. #
  9. # Software distributed under the License is distributed on an "AS
  10. # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  11. # implied. See the License for the specific language governing
  12. # rights and limitations under the License.
  13. #
  14. # The Original Code is the Bugzilla Bug Tracking System.
  15. #
  16. # The Initial Developer of the Original Code is Netscape Communications
  17. # Corporation. Portions created by Netscape are
  18. # Copyright (C) 1998 Netscape Communications Corporation. All
  19. # Rights Reserved.
  20. #
  21. # Contributor(s): Terry Weissman <terry@mozilla.org>,
  22. # Harrison Page <harrison@netscape.com>
  23. # Gervase Markham <gerv@gerv.net>
  24. # Richard Walters <rwalters@qualcomm.com>
  25. # Jean-Sebastien Guay <jean_seb@hybride.com>
  26. # Frédéric Buclin <LpSolit@gmail.com>
  27. # Run me out of cron at midnight to collect Bugzilla statistics.
  28. #
  29. # To run new charts for a specific date, pass it in on the command line in
  30. # ISO (2004-08-14) format.
  31. use AnyDBM_File;
  32. use strict;
  33. use IO::Handle;
  34. use Cwd;
  35. use lib qw(. lib);
  36. use Bugzilla;
  37. use Bugzilla::Constants;
  38. use Bugzilla::Error;
  39. use Bugzilla::Util;
  40. use Bugzilla::Search;
  41. use Bugzilla::User;
  42. use Bugzilla::Product;
  43. use Bugzilla::Field;
  44. # Turn off output buffering (probably needed when displaying output feedback
  45. # in the regenerate mode).
  46. $| = 1;
  47. # Tidy up after graphing module
  48. my $cwd = Cwd::getcwd();
  49. if (chdir("graphs")) {
  50. unlink <./*.gif>;
  51. unlink <./*.png>;
  52. # chdir("..") doesn't work if graphs is a symlink, see bug 429378
  53. chdir($cwd);
  54. }
  55. # This is a pure command line script.
  56. Bugzilla->usage_mode(USAGE_MODE_CMDLINE);
  57. my $dbh = Bugzilla->switch_to_shadow_db();
  58. # To recreate the daily statistics, run "collectstats.pl --regenerate" .
  59. my $regenerate = 0;
  60. if ($#ARGV >= 0 && $ARGV[0] eq "--regenerate") {
  61. shift(@ARGV);
  62. $regenerate = 1;
  63. }
  64. my $datadir = bz_locations()->{'datadir'};
  65. my @myproducts = map {$_->name} Bugzilla::Product->get_all;
  66. unshift(@myproducts, "-All-");
  67. # As we can now customize statuses and resolutions, looking at the current list
  68. # of legal values only is not enough as some now removed statuses and resolutions
  69. # may have existed in the past, or have been renamed. We want them all.
  70. my $fields = {};
  71. foreach my $field ('bug_status', 'resolution') {
  72. my $values = get_legal_field_values($field);
  73. my $old_values = $dbh->selectcol_arrayref(
  74. "SELECT bugs_activity.added
  75. FROM bugs_activity
  76. INNER JOIN fielddefs
  77. ON fielddefs.id = bugs_activity.fieldid
  78. LEFT JOIN $field
  79. ON $field.value = bugs_activity.added
  80. WHERE fielddefs.name = ?
  81. AND $field.id IS NULL
  82. UNION
  83. SELECT bugs_activity.removed
  84. FROM bugs_activity
  85. INNER JOIN fielddefs
  86. ON fielddefs.id = bugs_activity.fieldid
  87. LEFT JOIN $field
  88. ON $field.value = bugs_activity.removed
  89. WHERE fielddefs.name = ?
  90. AND $field.id IS NULL",
  91. undef, ($field, $field));
  92. push(@$values, @$old_values);
  93. $fields->{$field} = $values;
  94. }
  95. my @statuses = @{$fields->{'bug_status'}};
  96. my @resolutions = @{$fields->{'resolution'}};
  97. # Exclude "" from the resolution list.
  98. @resolutions = grep {$_} @resolutions;
  99. my $tstart = time;
  100. foreach (@myproducts) {
  101. my $dir = "$datadir/mining";
  102. &check_data_dir ($dir);
  103. if ($regenerate) {
  104. &regenerate_stats($dir, $_);
  105. } else {
  106. &collect_stats($dir, $_);
  107. }
  108. }
  109. my $tend = time;
  110. # Uncomment the following line for performance testing.
  111. #print "Total time taken " . delta_time($tstart, $tend) . "\n";
  112. &calculate_dupes();
  113. CollectSeriesData();
  114. {
  115. local $ENV{'GATEWAY_INTERFACE'} = 'cmdline';
  116. local $ENV{'REQUEST_METHOD'} = 'GET';
  117. local $ENV{'QUERY_STRING'} = 'ctype=rdf';
  118. my $perl = $^X;
  119. trick_taint($perl);
  120. # Generate a static RDF file containing the default view of the duplicates data.
  121. open(CGI, "$perl -T duplicates.cgi |")
  122. || die "can't fork duplicates.cgi: $!";
  123. open(RDF, ">$datadir/duplicates.tmp")
  124. || die "can't write to $datadir/duplicates.tmp: $!";
  125. my $headers_done = 0;
  126. while (<CGI>) {
  127. print RDF if $headers_done;
  128. $headers_done = 1 if $_ eq "\r\n";
  129. }
  130. close CGI;
  131. close RDF;
  132. }
  133. if (-s "$datadir/duplicates.tmp") {
  134. rename("$datadir/duplicates.rdf", "$datadir/duplicates-old.rdf");
  135. rename("$datadir/duplicates.tmp", "$datadir/duplicates.rdf");
  136. }
  137. sub check_data_dir {
  138. my $dir = shift;
  139. if (! -d $dir) {
  140. mkdir $dir, 0755;
  141. chmod 0755, $dir;
  142. }
  143. }
  144. sub collect_stats {
  145. my $dir = shift;
  146. my $product = shift;
  147. my $when = localtime (time);
  148. my $dbh = Bugzilla->dbh;
  149. my $product_id;
  150. if ($product ne '-All-') {
  151. my $prod = Bugzilla::Product::check_product($product);
  152. $product_id = $prod->id;
  153. }
  154. # NB: Need to mangle the product for the filename, but use the real
  155. # product name in the query
  156. my $file_product = $product;
  157. $file_product =~ s/\//-/gs;
  158. my $file = join '/', $dir, $file_product;
  159. my $exists = -f $file;
  160. # if the file exists, get the old status and resolution list for that product.
  161. my @data;
  162. @data = get_old_data($file) if $exists;
  163. # If @data is not empty, then we have to recreate the data file.
  164. if (scalar(@data)) {
  165. open(DATA, '>', $file)
  166. || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
  167. }
  168. else {
  169. open(DATA, '>>', $file)
  170. || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
  171. }
  172. # Now collect current data.
  173. my @row = (today());
  174. my $status_sql = q{SELECT COUNT(*) FROM bugs WHERE bug_status = ?};
  175. my $reso_sql = q{SELECT COUNT(*) FROM bugs WHERE resolution = ?};
  176. if ($product ne '-All-') {
  177. $status_sql .= q{ AND product_id = ?};
  178. $reso_sql .= q{ AND product_id = ?};
  179. }
  180. my $sth_status = $dbh->prepare($status_sql);
  181. my $sth_reso = $dbh->prepare($reso_sql);
  182. my @values ;
  183. foreach my $status (@statuses) {
  184. @values = ($status);
  185. push (@values, $product_id) if ($product ne '-All-');
  186. my $count = $dbh->selectrow_array($sth_status, undef, @values);
  187. push(@row, $count);
  188. }
  189. foreach my $resolution (@resolutions) {
  190. @values = ($resolution);
  191. push (@values, $product_id) if ($product ne '-All-');
  192. my $count = $dbh->selectrow_array($sth_reso, undef, @values);
  193. push(@row, $count);
  194. }
  195. if (!$exists || scalar(@data)) {
  196. my $fields = join('|', ('DATE', @statuses, @resolutions));
  197. print DATA <<FIN;
  198. # Bugzilla Daily Bug Stats
  199. #
  200. # Do not edit me! This file is generated.
  201. #
  202. # fields: $fields
  203. # Product: $product
  204. # Created: $when
  205. FIN
  206. }
  207. # Add existing data, if needed. Note that no count is not treated
  208. # the same way as a count with 0 bug.
  209. foreach my $data (@data) {
  210. print DATA join('|', map {defined $data->{$_} ? $data->{$_} : ''}
  211. ('DATE', @statuses, @resolutions)) . "\n";
  212. }
  213. print DATA (join '|', @row) . "\n";
  214. close DATA;
  215. chmod 0644, $file;
  216. }
  217. sub get_old_data {
  218. my $file = shift;
  219. open(DATA, '<', $file)
  220. || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
  221. my @data;
  222. my @columns;
  223. my $recreate = 0;
  224. while (<DATA>) {
  225. chomp;
  226. next unless $_;
  227. if (/^# fields?:\s*(.+)\s*$/) {
  228. @columns = split(/\|/, $1);
  229. # Compare this list with @statuses and @resolutions.
  230. # If they are identical, then we can safely append new data
  231. # to the end of the file; else we have to recreate it.
  232. $recreate = 1;
  233. my @new_cols = ($columns[0], @statuses, @resolutions);
  234. if (scalar(@columns) == scalar(@new_cols)) {
  235. my $identical = 1;
  236. for (0 .. $#columns) {
  237. $identical = 0 if ($columns[$_] ne $new_cols[$_]);
  238. }
  239. last if $identical;
  240. }
  241. }
  242. next unless $recreate;
  243. next if (/^#/); # Ignore comments.
  244. # If we have to recreate the file, we have to load all existing
  245. # data first.
  246. my @line = split /\|/;
  247. my %data;
  248. foreach my $column (@columns) {
  249. $data{$column} = shift @line;
  250. }
  251. push(@data, \%data);
  252. }
  253. close(DATA);
  254. return @data;
  255. }
  256. sub calculate_dupes {
  257. my $dbh = Bugzilla->dbh;
  258. my $rows = $dbh->selectall_arrayref("SELECT dupe_of, dupe FROM duplicates");
  259. my %dupes;
  260. my %count;
  261. my $key;
  262. my $changed = 1;
  263. my $today = &today_dash;
  264. # Save % count here in a date-named file
  265. # so we can read it back in to do changed counters
  266. # First, delete it if it exists, so we don't add to the contents of an old file
  267. my $datadir = bz_locations()->{'datadir'};
  268. if (my @files = <$datadir/duplicates/dupes$today*>) {
  269. map { trick_taint($_) } @files;
  270. unlink @files;
  271. }
  272. dbmopen(%count, "$datadir/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
  273. # Create a hash with key "a bug number", value "bug which that bug is a
  274. # direct dupe of" - straight from the duplicates table.
  275. foreach my $row (@$rows) {
  276. my ($dupe_of, $dupe) = @$row;
  277. $dupes{$dupe} = $dupe_of;
  278. }
  279. # Total up the number of bugs which are dupes of a given bug
  280. # count will then have key = "bug number",
  281. # value = "number of immediate dupes of that bug".
  282. foreach $key (keys(%dupes))
  283. {
  284. my $dupe_of = $dupes{$key};
  285. if (!defined($count{$dupe_of})) {
  286. $count{$dupe_of} = 0;
  287. }
  288. $count{$dupe_of}++;
  289. }
  290. # Now we collapse the dupe tree by iterating over %count until
  291. # there is no further change.
  292. while ($changed == 1)
  293. {
  294. $changed = 0;
  295. foreach $key (keys(%count)) {
  296. # if this bug is actually itself a dupe, and has a count...
  297. if (defined($dupes{$key}) && $count{$key} > 0) {
  298. # add that count onto the bug it is a dupe of,
  299. # and zero the count; the check is to avoid
  300. # loops
  301. if ($count{$dupes{$key}} != 0) {
  302. $count{$dupes{$key}} += $count{$key};
  303. $count{$key} = 0;
  304. $changed = 1;
  305. }
  306. }
  307. }
  308. }
  309. # Remove the values for which the count is zero
  310. foreach $key (keys(%count))
  311. {
  312. if ($count{$key} == 0) {
  313. delete $count{$key};
  314. }
  315. }
  316. dbmclose(%count);
  317. }
  318. # This regenerates all statistics from the database.
  319. sub regenerate_stats {
  320. my $dir = shift;
  321. my $product = shift;
  322. my $dbh = Bugzilla->dbh;
  323. my $when = localtime(time());
  324. my $tstart = time();
  325. # NB: Need to mangle the product for the filename, but use the real
  326. # product name in the query
  327. my $file_product = $product;
  328. $file_product =~ s/\//-/gs;
  329. my $file = join '/', $dir, $file_product;
  330. my @bugs;
  331. my $and_product = "";
  332. my $from_product = "";
  333. my @values = ();
  334. if ($product ne '-All-') {
  335. $and_product = q{ AND products.name = ?};
  336. $from_product = q{ INNER JOIN products
  337. ON bugs.product_id = products.id};
  338. push (@values, $product);
  339. }
  340. # Determine the start date from the date the first bug in the
  341. # database was created, and the end date from the current day.
  342. # If there were no bugs in the search, return early.
  343. my $query = q{SELECT } .
  344. $dbh->sql_to_days('creation_ts') . q{ AS start_day, } .
  345. $dbh->sql_to_days('current_date') . q{ AS end_day, } .
  346. $dbh->sql_to_days("'1970-01-01'") .
  347. qq{ FROM bugs $from_product
  348. WHERE } . $dbh->sql_to_days('creation_ts') .
  349. qq{ IS NOT NULL $and_product
  350. ORDER BY start_day } . $dbh->sql_limit(1);
  351. my ($start, $end, $base) = $dbh->selectrow_array($query, undef, @values);
  352. if (!defined $start) {
  353. return;
  354. }
  355. if (open DATA, ">$file") {
  356. DATA->autoflush(1);
  357. my $fields = join('|', ('DATE', @statuses, @resolutions));
  358. print DATA <<FIN;
  359. # Bugzilla Daily Bug Stats
  360. #
  361. # Do not edit me! This file is generated.
  362. #
  363. # fields: $fields
  364. # Product: $product
  365. # Created: $when
  366. FIN
  367. # For each day, generate a line of statistics.
  368. my $total_days = $end - $start;
  369. for (my $day = $start + 1; $day <= $end; $day++) {
  370. # Some output feedback
  371. my $percent_done = ($day - $start - 1) * 100 / $total_days;
  372. printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done;
  373. # Get a list of bugs that were created the previous day, and
  374. # add those bugs to the list of bugs for this product.
  375. $query = qq{SELECT bug_id
  376. FROM bugs $from_product
  377. WHERE bugs.creation_ts < } .
  378. $dbh->sql_from_days($day - 1) .
  379. q{ AND bugs.creation_ts >= } .
  380. $dbh->sql_from_days($day - 2) .
  381. $and_product . q{ ORDER BY bug_id};
  382. my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
  383. push(@bugs, @$bug_ids);
  384. # For each bug that existed on that day, determine its status
  385. # at the beginning of the day. If there were no status
  386. # changes on or after that day, the status was the same as it
  387. # is today, which can be found in the bugs table. Otherwise,
  388. # the status was equal to the first "previous value" entry in
  389. # the bugs_activity table for that bug made on or after that
  390. # day.
  391. my %bugcount;
  392. foreach (@statuses) { $bugcount{$_} = 0; }
  393. foreach (@resolutions) { $bugcount{$_} = 0; }
  394. # Get information on bug states and resolutions.
  395. $query = qq{SELECT bugs_activity.removed
  396. FROM bugs_activity
  397. INNER JOIN fielddefs
  398. ON bugs_activity.fieldid = fielddefs.id
  399. WHERE fielddefs.name = ?
  400. AND bugs_activity.bug_id = ?
  401. AND bugs_activity.bug_when >= } .
  402. $dbh->sql_from_days($day) .
  403. " ORDER BY bugs_activity.bug_when " .
  404. $dbh->sql_limit(1);
  405. my $sth_bug = $dbh->prepare($query);
  406. my $sth_status = $dbh->prepare(q{SELECT bug_status
  407. FROM bugs
  408. WHERE bug_id = ?});
  409. my $sth_reso = $dbh->prepare(q{SELECT resolution
  410. FROM bugs
  411. WHERE bug_id = ?});
  412. for my $bug (@bugs) {
  413. my $status = $dbh->selectrow_array($sth_bug, undef,
  414. 'bug_status', $bug);
  415. unless ($status) {
  416. $status = $dbh->selectrow_array($sth_status, undef, $bug);
  417. }
  418. if (defined $bugcount{$status}) {
  419. $bugcount{$status}++;
  420. }
  421. my $resolution = $dbh->selectrow_array($sth_bug, undef,
  422. 'resolution', $bug);
  423. unless ($resolution) {
  424. $resolution = $dbh->selectrow_array($sth_reso, undef, $bug);
  425. }
  426. if (defined $bugcount{$resolution}) {
  427. $bugcount{$resolution}++;
  428. }
  429. }
  430. # Generate a line of output containing the date and counts
  431. # of bugs in each state.
  432. my $date = sqlday($day, $base);
  433. print DATA "$date";
  434. foreach (@statuses) { print DATA "|$bugcount{$_}"; }
  435. foreach (@resolutions) { print DATA "|$bugcount{$_}"; }
  436. print DATA "\n";
  437. }
  438. # Finish up output feedback for this product.
  439. my $tend = time;
  440. print "\rRegenerating $product \[100.0\%] - " .
  441. delta_time($tstart, $tend) . "\n";
  442. close DATA;
  443. chmod 0640, $file;
  444. }
  445. }
  446. sub today {
  447. my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
  448. return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
  449. }
  450. sub today_dash {
  451. my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
  452. return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
  453. }
  454. sub sqlday {
  455. my ($day, $base) = @_;
  456. $day = ($day - $base) * 86400;
  457. my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5];
  458. return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
  459. }
  460. sub delta_time {
  461. my $tstart = shift;
  462. my $tend = shift;
  463. my $delta = $tend - $tstart;
  464. my $hours = int($delta/3600);
  465. my $minutes = int($delta/60) - ($hours * 60);
  466. my $seconds = $delta - ($minutes * 60) - ($hours * 3600);
  467. return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds);
  468. }
  469. sub CollectSeriesData {
  470. # We need some way of randomising the distribution of series, such that
  471. # all of the series which are to be run every 7 days don't run on the same
  472. # day. This is because this might put the server under severe load if a
  473. # particular frequency, such as once a week, is very common. We achieve
  474. # this by only running queries when:
  475. # (days_since_epoch + series_id) % frequency = 0. So they'll run every
  476. # <frequency> days, but the start date depends on the series_id.
  477. my $days_since_epoch = int(time() / (60 * 60 * 24));
  478. my $today = $ARGV[0] || today_dash();
  479. # We save a copy of the main $dbh and then switch to the shadow and get
  480. # that one too. Remember, these may be the same.
  481. my $dbh = Bugzilla->switch_to_main_db();
  482. my $shadow_dbh = Bugzilla->switch_to_shadow_db();
  483. my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " .
  484. "FROM series " .
  485. "WHERE frequency != 0 AND " .
  486. "MOD(($days_since_epoch + series_id), frequency) = 0",
  487. "series_id");
  488. # We prepare the insertion into the data table, for efficiency.
  489. my $sth = $dbh->prepare("INSERT INTO series_data " .
  490. "(series_id, series_date, series_value) " .
  491. "VALUES (?, " . $dbh->quote($today) . ", ?)");
  492. # We delete from the table beforehand, to avoid SQL errors if people run
  493. # collectstats.pl twice on the same day.
  494. my $deletesth = $dbh->prepare("DELETE FROM series_data
  495. WHERE series_id = ? AND series_date = " .
  496. $dbh->quote($today));
  497. foreach my $series_id (keys %$serieses) {
  498. # We set up the user for Search.pm's permission checking - each series
  499. # runs with the permissions of its creator.
  500. my $user = new Bugzilla::User($serieses->{$series_id}->{'creator'});
  501. my $cgi = new Bugzilla::CGI($serieses->{$series_id}->{'query'});
  502. my $data;
  503. # Do not die if Search->new() detects invalid data, such as an obsolete
  504. # login name or a renamed product or component, etc.
  505. eval {
  506. my $search = new Bugzilla::Search('params' => $cgi,
  507. 'fields' => ["bugs.bug_id"],
  508. 'user' => $user);
  509. my $sql = $search->getSQL();
  510. $data = $shadow_dbh->selectall_arrayref($sql);
  511. };
  512. if (!$@) {
  513. # We need to count the returned rows. Without subselects, we can't
  514. # do this directly in the SQL for all queries. So we do it by hand.
  515. my $count = scalar(@$data) || 0;
  516. $deletesth->execute($series_id);
  517. $sth->execute($series_id, $count);
  518. }
  519. }
  520. }