|
- #!/usr/bin/env perl -w
- # -*- Mode: perl; indent-tabs-mode: nil -*-
- #
- # The contents of this file are subject to the Mozilla Public
- # License Version 1.1 (the "License"); you may not use this file
- # except in compliance with the License. You may obtain a copy of
- # the License at http://www.mozilla.org/MPL/
- #
- # Software distributed under the License is distributed on an "AS
- # IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
- # implied. See the License for the specific language governing
- # rights and limitations under the License.
- #
- # The Original Code is the Bugzilla Bug Tracking System.
- #
- # The Initial Developer of the Original Code is Netscape Communications
- # Corporation. Portions created by Netscape are
- # Copyright (C) 1998 Netscape Communications Corporation. All
- # Rights Reserved.
- #
- # Contributor(s): Terry Weissman <terry@mozilla.org>,
- # Harrison Page <harrison@netscape.com>
- # Gervase Markham <gerv@gerv.net>
- # Richard Walters <rwalters@qualcomm.com>
- # Jean-Sebastien Guay <jean_seb@hybride.com>
- # Frédéric Buclin <LpSolit@gmail.com>
- # Run me out of cron at midnight to collect Bugzilla statistics.
- #
- # To run new charts for a specific date, pass it in on the command line in
- # ISO (2004-08-14) format.
- use AnyDBM_File;
- use strict;
- use IO::Handle;
- use Cwd;
- use lib qw(. lib);
- use Bugzilla;
- use Bugzilla::Constants;
- use Bugzilla::Error;
- use Bugzilla::Util;
- use Bugzilla::Search;
- use Bugzilla::User;
- use Bugzilla::Product;
- use Bugzilla::Field;
- # Turn off output buffering (probably needed when displaying output feedback
- # in the regenerate mode).
- $| = 1;
- # Tidy up after graphing module
- my $cwd = Cwd::getcwd();
- if (chdir("graphs")) {
- unlink <./*.gif>;
- unlink <./*.png>;
- # chdir("..") doesn't work if graphs is a symlink, see bug 429378
- chdir($cwd);
- }
- # This is a pure command line script.
- Bugzilla->usage_mode(USAGE_MODE_CMDLINE);
- my $dbh = Bugzilla->switch_to_shadow_db();
- # To recreate the daily statistics, run "collectstats.pl --regenerate" .
- my $regenerate = 0;
- if ($#ARGV >= 0 && $ARGV[0] eq "--regenerate") {
- shift(@ARGV);
- $regenerate = 1;
- }
- my $datadir = bz_locations()->{'datadir'};
- my @myproducts = map {$_->name} Bugzilla::Product->get_all;
- unshift(@myproducts, "-All-");
- # As we can now customize statuses and resolutions, looking at the current list
- # of legal values only is not enough as some now removed statuses and resolutions
- # may have existed in the past, or have been renamed. We want them all.
- my $fields = {};
- foreach my $field ('bug_status', 'resolution') {
- my $values = get_legal_field_values($field);
- my $old_values = $dbh->selectcol_arrayref(
- "SELECT bugs_activity.added
- FROM bugs_activity
- INNER JOIN fielddefs
- ON fielddefs.id = bugs_activity.fieldid
- LEFT JOIN $field
- ON $field.value = bugs_activity.added
- WHERE fielddefs.name = ?
- AND $field.id IS NULL
- UNION
- SELECT bugs_activity.removed
- FROM bugs_activity
- INNER JOIN fielddefs
- ON fielddefs.id = bugs_activity.fieldid
- LEFT JOIN $field
- ON $field.value = bugs_activity.removed
- WHERE fielddefs.name = ?
- AND $field.id IS NULL",
- undef, ($field, $field));
- push(@$values, @$old_values);
- $fields->{$field} = $values;
- }
- my @statuses = @{$fields->{'bug_status'}};
- my @resolutions = @{$fields->{'resolution'}};
- # Exclude "" from the resolution list.
- @resolutions = grep {$_} @resolutions;
- my $tstart = time;
- foreach (@myproducts) {
- my $dir = "$datadir/mining";
- &check_data_dir ($dir);
- if ($regenerate) {
- ®enerate_stats($dir, $_);
- } else {
- &collect_stats($dir, $_);
- }
- }
- my $tend = time;
- # Uncomment the following line for performance testing.
- #print "Total time taken " . delta_time($tstart, $tend) . "\n";
- &calculate_dupes();
- CollectSeriesData();
- {
- local $ENV{'GATEWAY_INTERFACE'} = 'cmdline';
- local $ENV{'REQUEST_METHOD'} = 'GET';
- local $ENV{'QUERY_STRING'} = 'ctype=rdf';
- my $perl = $^X;
- trick_taint($perl);
- # Generate a static RDF file containing the default view of the duplicates data.
- open(CGI, "$perl -T duplicates.cgi |")
- || die "can't fork duplicates.cgi: $!";
- open(RDF, ">$datadir/duplicates.tmp")
- || die "can't write to $datadir/duplicates.tmp: $!";
- my $headers_done = 0;
- while (<CGI>) {
- print RDF if $headers_done;
- $headers_done = 1 if $_ eq "\r\n";
- }
- close CGI;
- close RDF;
- }
- if (-s "$datadir/duplicates.tmp") {
- rename("$datadir/duplicates.rdf", "$datadir/duplicates-old.rdf");
- rename("$datadir/duplicates.tmp", "$datadir/duplicates.rdf");
- }
- sub check_data_dir {
- my $dir = shift;
- if (! -d $dir) {
- mkdir $dir, 0755;
- chmod 0755, $dir;
- }
- }
- sub collect_stats {
- my $dir = shift;
- my $product = shift;
- my $when = localtime (time);
- my $dbh = Bugzilla->dbh;
- my $product_id;
- if ($product ne '-All-') {
- my $prod = Bugzilla::Product::check_product($product);
- $product_id = $prod->id;
- }
- # NB: Need to mangle the product for the filename, but use the real
- # product name in the query
- my $file_product = $product;
- $file_product =~ s/\//-/gs;
- my $file = join '/', $dir, $file_product;
- my $exists = -f $file;
- # if the file exists, get the old status and resolution list for that product.
- my @data;
- @data = get_old_data($file) if $exists;
- # If @data is not empty, then we have to recreate the data file.
- if (scalar(@data)) {
- open(DATA, '>', $file)
- || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
- }
- else {
- open(DATA, '>>', $file)
- || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
- }
- # Now collect current data.
- my @row = (today());
- my $status_sql = q{SELECT COUNT(*) FROM bugs WHERE bug_status = ?};
- my $reso_sql = q{SELECT COUNT(*) FROM bugs WHERE resolution = ?};
- if ($product ne '-All-') {
- $status_sql .= q{ AND product_id = ?};
- $reso_sql .= q{ AND product_id = ?};
- }
- my $sth_status = $dbh->prepare($status_sql);
- my $sth_reso = $dbh->prepare($reso_sql);
- my @values ;
- foreach my $status (@statuses) {
- @values = ($status);
- push (@values, $product_id) if ($product ne '-All-');
- my $count = $dbh->selectrow_array($sth_status, undef, @values);
- push(@row, $count);
- }
- foreach my $resolution (@resolutions) {
- @values = ($resolution);
- push (@values, $product_id) if ($product ne '-All-');
- my $count = $dbh->selectrow_array($sth_reso, undef, @values);
- push(@row, $count);
- }
- if (!$exists || scalar(@data)) {
- my $fields = join('|', ('DATE', @statuses, @resolutions));
- print DATA <<FIN;
- # Bugzilla Daily Bug Stats
- #
- # Do not edit me! This file is generated.
- #
- # fields: $fields
- # Product: $product
- # Created: $when
- FIN
- }
- # Add existing data, if needed. Note that no count is not treated
- # the same way as a count with 0 bug.
- foreach my $data (@data) {
- print DATA join('|', map {defined $data->{$_} ? $data->{$_} : ''}
- ('DATE', @statuses, @resolutions)) . "\n";
- }
- print DATA (join '|', @row) . "\n";
- close DATA;
- chmod 0644, $file;
- }
- sub get_old_data {
- my $file = shift;
- open(DATA, '<', $file)
- || ThrowCodeError('chart_file_open_fail', {'filename' => $file});
- my @data;
- my @columns;
- my $recreate = 0;
- while (<DATA>) {
- chomp;
- next unless $_;
- if (/^# fields?:\s*(.+)\s*$/) {
- @columns = split(/\|/, $1);
- # Compare this list with @statuses and @resolutions.
- # If they are identical, then we can safely append new data
- # to the end of the file; else we have to recreate it.
- $recreate = 1;
- my @new_cols = ($columns[0], @statuses, @resolutions);
- if (scalar(@columns) == scalar(@new_cols)) {
- my $identical = 1;
- for (0 .. $#columns) {
- $identical = 0 if ($columns[$_] ne $new_cols[$_]);
- }
- last if $identical;
- }
- }
- next unless $recreate;
- next if (/^#/); # Ignore comments.
- # If we have to recreate the file, we have to load all existing
- # data first.
- my @line = split /\|/;
- my %data;
- foreach my $column (@columns) {
- $data{$column} = shift @line;
- }
- push(@data, \%data);
- }
- close(DATA);
- return @data;
- }
- sub calculate_dupes {
- my $dbh = Bugzilla->dbh;
- my $rows = $dbh->selectall_arrayref("SELECT dupe_of, dupe FROM duplicates");
- my %dupes;
- my %count;
- my $key;
- my $changed = 1;
- my $today = &today_dash;
- # Save % count here in a date-named file
- # so we can read it back in to do changed counters
- # First, delete it if it exists, so we don't add to the contents of an old file
- my $datadir = bz_locations()->{'datadir'};
- if (my @files = <$datadir/duplicates/dupes$today*>) {
- map { trick_taint($_) } @files;
- unlink @files;
- }
-
- dbmopen(%count, "$datadir/duplicates/dupes$today", 0644) || die "Can't open DBM dupes file: $!";
- # Create a hash with key "a bug number", value "bug which that bug is a
- # direct dupe of" - straight from the duplicates table.
- foreach my $row (@$rows) {
- my ($dupe_of, $dupe) = @$row;
- $dupes{$dupe} = $dupe_of;
- }
- # Total up the number of bugs which are dupes of a given bug
- # count will then have key = "bug number",
- # value = "number of immediate dupes of that bug".
- foreach $key (keys(%dupes))
- {
- my $dupe_of = $dupes{$key};
- if (!defined($count{$dupe_of})) {
- $count{$dupe_of} = 0;
- }
- $count{$dupe_of}++;
- }
- # Now we collapse the dupe tree by iterating over %count until
- # there is no further change.
- while ($changed == 1)
- {
- $changed = 0;
- foreach $key (keys(%count)) {
- # if this bug is actually itself a dupe, and has a count...
- if (defined($dupes{$key}) && $count{$key} > 0) {
- # add that count onto the bug it is a dupe of,
- # and zero the count; the check is to avoid
- # loops
- if ($count{$dupes{$key}} != 0) {
- $count{$dupes{$key}} += $count{$key};
- $count{$key} = 0;
- $changed = 1;
- }
- }
- }
- }
- # Remove the values for which the count is zero
- foreach $key (keys(%count))
- {
- if ($count{$key} == 0) {
- delete $count{$key};
- }
- }
-
- dbmclose(%count);
- }
- # This regenerates all statistics from the database.
- sub regenerate_stats {
- my $dir = shift;
- my $product = shift;
- my $dbh = Bugzilla->dbh;
- my $when = localtime(time());
- my $tstart = time();
- # NB: Need to mangle the product for the filename, but use the real
- # product name in the query
- my $file_product = $product;
- $file_product =~ s/\//-/gs;
- my $file = join '/', $dir, $file_product;
- my @bugs;
- my $and_product = "";
- my $from_product = "";
- my @values = ();
- if ($product ne '-All-') {
- $and_product = q{ AND products.name = ?};
- $from_product = q{ INNER JOIN products
- ON bugs.product_id = products.id};
- push (@values, $product);
- }
- # Determine the start date from the date the first bug in the
- # database was created, and the end date from the current day.
- # If there were no bugs in the search, return early.
- my $query = q{SELECT } .
- $dbh->sql_to_days('creation_ts') . q{ AS start_day, } .
- $dbh->sql_to_days('current_date') . q{ AS end_day, } .
- $dbh->sql_to_days("'1970-01-01'") .
- qq{ FROM bugs $from_product
- WHERE } . $dbh->sql_to_days('creation_ts') .
- qq{ IS NOT NULL $and_product
- ORDER BY start_day } . $dbh->sql_limit(1);
- my ($start, $end, $base) = $dbh->selectrow_array($query, undef, @values);
- if (!defined $start) {
- return;
- }
- if (open DATA, ">$file") {
- DATA->autoflush(1);
- my $fields = join('|', ('DATE', @statuses, @resolutions));
- print DATA <<FIN;
- # Bugzilla Daily Bug Stats
- #
- # Do not edit me! This file is generated.
- #
- # fields: $fields
- # Product: $product
- # Created: $when
- FIN
- # For each day, generate a line of statistics.
- my $total_days = $end - $start;
- for (my $day = $start + 1; $day <= $end; $day++) {
- # Some output feedback
- my $percent_done = ($day - $start - 1) * 100 / $total_days;
- printf "\rRegenerating $product \[\%.1f\%\%]", $percent_done;
- # Get a list of bugs that were created the previous day, and
- # add those bugs to the list of bugs for this product.
- $query = qq{SELECT bug_id
- FROM bugs $from_product
- WHERE bugs.creation_ts < } .
- $dbh->sql_from_days($day - 1) .
- q{ AND bugs.creation_ts >= } .
- $dbh->sql_from_days($day - 2) .
- $and_product . q{ ORDER BY bug_id};
- my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
- push(@bugs, @$bug_ids);
- # For each bug that existed on that day, determine its status
- # at the beginning of the day. If there were no status
- # changes on or after that day, the status was the same as it
- # is today, which can be found in the bugs table. Otherwise,
- # the status was equal to the first "previous value" entry in
- # the bugs_activity table for that bug made on or after that
- # day.
- my %bugcount;
- foreach (@statuses) { $bugcount{$_} = 0; }
- foreach (@resolutions) { $bugcount{$_} = 0; }
- # Get information on bug states and resolutions.
- $query = qq{SELECT bugs_activity.removed
- FROM bugs_activity
- INNER JOIN fielddefs
- ON bugs_activity.fieldid = fielddefs.id
- WHERE fielddefs.name = ?
- AND bugs_activity.bug_id = ?
- AND bugs_activity.bug_when >= } .
- $dbh->sql_from_days($day) .
- " ORDER BY bugs_activity.bug_when " .
- $dbh->sql_limit(1);
- my $sth_bug = $dbh->prepare($query);
- my $sth_status = $dbh->prepare(q{SELECT bug_status
- FROM bugs
- WHERE bug_id = ?});
-
- my $sth_reso = $dbh->prepare(q{SELECT resolution
- FROM bugs
- WHERE bug_id = ?});
- for my $bug (@bugs) {
- my $status = $dbh->selectrow_array($sth_bug, undef,
- 'bug_status', $bug);
- unless ($status) {
- $status = $dbh->selectrow_array($sth_status, undef, $bug);
- }
- if (defined $bugcount{$status}) {
- $bugcount{$status}++;
- }
- my $resolution = $dbh->selectrow_array($sth_bug, undef,
- 'resolution', $bug);
- unless ($resolution) {
- $resolution = $dbh->selectrow_array($sth_reso, undef, $bug);
- }
-
- if (defined $bugcount{$resolution}) {
- $bugcount{$resolution}++;
- }
- }
- # Generate a line of output containing the date and counts
- # of bugs in each state.
- my $date = sqlday($day, $base);
- print DATA "$date";
- foreach (@statuses) { print DATA "|$bugcount{$_}"; }
- foreach (@resolutions) { print DATA "|$bugcount{$_}"; }
- print DATA "\n";
- }
-
- # Finish up output feedback for this product.
- my $tend = time;
- print "\rRegenerating $product \[100.0\%] - " .
- delta_time($tstart, $tend) . "\n";
-
- close DATA;
- chmod 0640, $file;
- }
- }
- sub today {
- my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
- return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
- }
- sub today_dash {
- my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
- return sprintf "%04d-%02d-%02d", 1900 + $year, ++$mon, $dom;
- }
- sub sqlday {
- my ($day, $base) = @_;
- $day = ($day - $base) * 86400;
- my ($dom, $mon, $year) = (gmtime($day))[3, 4, 5];
- return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
- }
- sub delta_time {
- my $tstart = shift;
- my $tend = shift;
- my $delta = $tend - $tstart;
- my $hours = int($delta/3600);
- my $minutes = int($delta/60) - ($hours * 60);
- my $seconds = $delta - ($minutes * 60) - ($hours * 3600);
- return sprintf("%02d:%02d:%02d" , $hours, $minutes, $seconds);
- }
- sub CollectSeriesData {
- # We need some way of randomising the distribution of series, such that
- # all of the series which are to be run every 7 days don't run on the same
- # day. This is because this might put the server under severe load if a
- # particular frequency, such as once a week, is very common. We achieve
- # this by only running queries when:
- # (days_since_epoch + series_id) % frequency = 0. So they'll run every
- # <frequency> days, but the start date depends on the series_id.
- my $days_since_epoch = int(time() / (60 * 60 * 24));
- my $today = $ARGV[0] || today_dash();
- # We save a copy of the main $dbh and then switch to the shadow and get
- # that one too. Remember, these may be the same.
- my $dbh = Bugzilla->switch_to_main_db();
- my $shadow_dbh = Bugzilla->switch_to_shadow_db();
-
- my $serieses = $dbh->selectall_hashref("SELECT series_id, query, creator " .
- "FROM series " .
- "WHERE frequency != 0 AND " .
- "MOD(($days_since_epoch + series_id), frequency) = 0",
- "series_id");
- # We prepare the insertion into the data table, for efficiency.
- my $sth = $dbh->prepare("INSERT INTO series_data " .
- "(series_id, series_date, series_value) " .
- "VALUES (?, " . $dbh->quote($today) . ", ?)");
- # We delete from the table beforehand, to avoid SQL errors if people run
- # collectstats.pl twice on the same day.
- my $deletesth = $dbh->prepare("DELETE FROM series_data
- WHERE series_id = ? AND series_date = " .
- $dbh->quote($today));
-
- foreach my $series_id (keys %$serieses) {
- # We set up the user for Search.pm's permission checking - each series
- # runs with the permissions of its creator.
- my $user = new Bugzilla::User($serieses->{$series_id}->{'creator'});
- my $cgi = new Bugzilla::CGI($serieses->{$series_id}->{'query'});
- my $data;
- # Do not die if Search->new() detects invalid data, such as an obsolete
- # login name or a renamed product or component, etc.
- eval {
- my $search = new Bugzilla::Search('params' => $cgi,
- 'fields' => ["bugs.bug_id"],
- 'user' => $user);
- my $sql = $search->getSQL();
- $data = $shadow_dbh->selectall_arrayref($sql);
- };
- if (!$@) {
- # We need to count the returned rows. Without subselects, we can't
- # do this directly in the SQL for all queries. So we do it by hand.
- my $count = scalar(@$data) || 0;
- $deletesth->execute($series_id);
- $sth->execute($series_id, $count);
- }
- }
- }
|