graph.pl 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/perl
  2. # Copyright (C) 2003 Alex Schroeder <alex@emacswiki.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. # Usage: perl graph.pl URL StartPage depth breadth stop-regexp
  17. # All arguments are optional.
  18. #
  19. # Defaults:
  20. # URL http://www.emacswiki.org/cgi-bin/wiki?action=links;exists=1;raw=1
  21. # StartPage none -- all other options only have effect if this one is set!
  22. # Depth 2
  23. # Breadth 4
  24. # Stop-Regexp ^(Category|SiteMap)
  25. #
  26. # The HTML data is cached. From then on the URL parameter has no effect.
  27. # To refresh the cache, delete the 'graph.cache' file.
  28. #
  29. # Breadth selects a number of children to include. These are sorted by
  30. # number of incoming links.
  31. #
  32. # Example usage:
  33. # perl graph.pl -> download cache file and produce a graph.dot for the entire wiki
  34. # perl graph.pl cache AlexSchroeder -> from the cache, start with AlexSchroeder
  35. # springgraph < cache.dot > cache.png
  36. #
  37. $uri = $ARGV[0];
  38. $uri = "http://www.emacswiki.org/cgi-bin/wiki?action=links;exists=1;raw=1" unless $uri;
  39. $start = $ARGV[1];
  40. $depth = $ARGV[2];
  41. $depth = 2 unless $depth;
  42. $breadth = $ARGV[3];
  43. $breadth = 4 unless $breadth;
  44. $stop = $ARGV[4];
  45. $stop = "^(Category|SiteMap)" unless $stop;
  46. if (-f 'graph.cache') {
  47. print "Reusing graph.cache -- delete it if you want a fresh one.\n";
  48. } else {
  49. print "Downloading graph.cache and saving for reuse.\n";
  50. $command = "wget -O graph.cache $uri";
  51. print "Using $command\n";
  52. system(split(/ /, $command)) == 0 or die "Cannot run wget\n";
  53. }
  54. if (not $start) {
  55. open (F,'<graph.cache') or warn "Cannot read graph.cache\n";
  56. print "Reading graph.cache...\n";
  57. undef $/;
  58. $data = <F>;
  59. close (F);
  60. open (F,'>graph.dot') or warn "Cannot write graph.dot\n";
  61. print "Writing graph.dot...\n";
  62. print "Using all pages...\n";
  63. print F "digraph links {\n";
  64. print F $data;
  65. print F "}\n";
  66. close (F);
  67. exit;
  68. }
  69. open(F,'graph.cache') or warn "Cannot read graph.cache\n";
  70. print "Reading graph.cache...\n";
  71. while($_ = <F>) {
  72. if (m/^"(.*?)" -> "(.*?)"$/) {
  73. push (@{$page{$1}}, $2);
  74. $score{$2}++;
  75. }
  76. }
  77. close(F);
  78. open(F,'>graph.dot') or warn "Cannot write graph.dot\n";
  79. print "Writing graph.dot...\n";
  80. print F "digraph links {\n";
  81. print "Starting with $start...\n";
  82. $count = 0;
  83. @pages = ($start);
  84. while ($count++ < $depth) {
  85. @current = @pages;
  86. foreach (@pages) {
  87. $done{$_} = 1;
  88. }
  89. @pages = ();
  90. foreach $page (@current) {
  91. @links = @{$page{$page}};
  92. @links = sort {$score{$a} <=> $score{$b}} @links; # only take pages with highest score
  93. @links = @links[0..$breadth-1] if $#links >= $breadth;
  94. next if $stop and eval "$page =~ /$stop/"; # no children for stop pages
  95. foreach $target (sort @links) {
  96. push(@pages, $target) unless $done{$target}; # don't cycle
  97. print F "\"$page\" -> \"$target\"\n";
  98. }
  99. }
  100. }
  101. print F "}\n";
  102. close(F);
  103. print "Done.\n";