graph.pl 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/perl
  2. # Copyright (C) 2003 Alex Schroeder <alex@emacswiki.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the
  16. # Free Software Foundation, Inc.
  17. # 59 Temple Place, Suite 330
  18. # Boston, MA 02111-1307 USA
  19. #
  20. #
  21. # Usage: perl graph.pl URL StartPage depth breadth stop-regexp
  22. # All arguments are optional.
  23. #
  24. # Defaults:
  25. # URL http://www.emacswiki.org/cgi-bin/wiki?action=links;exists=1;raw=1
  26. # StartPage none -- all other options only have effect if this one is set!
  27. # Depth 2
  28. # Breadth 4
  29. # Stop-Regexp ^(Category|SiteMap)
  30. #
  31. # The HTML data is cached. From then on the URL parameter has no effect.
  32. # To refresh the cache, delete the 'graph.cache' file.
  33. #
  34. # Breadth selects a number of children to include. These are sorted by
  35. # number of incoming links.
  36. #
  37. # Example usage:
  38. # perl graph.pl -> download cache file and produce a graph.dot for the entire wiki
  39. # perl graph.pl cache AlexSchroeder -> from the cache, start with AlexSchroeder
  40. # springgraph < cache.dot > cache.png
  41. #
  42. $uri = $ARGV[0];
  43. $uri = "http://www.emacswiki.org/cgi-bin/wiki?action=links;exists=1;raw=1" unless $uri;
  44. $start = $ARGV[1];
  45. $depth = $ARGV[2];
  46. $depth = 2 unless $depth;
  47. $breadth = $ARGV[3];
  48. $breadth = 4 unless $breadth;
  49. $stop = $ARGV[4];
  50. $stop = "^(Category|SiteMap)" unless $stop;
  51. if (-f 'graph.cache') {
  52. print "Reusing graph.cache -- delete it if you want a fresh one.\n";
  53. } else {
  54. print "Downloading graph.cache and saving for reuse.\n";
  55. $command = "wget -O graph.cache $uri";
  56. print "Using $command\n";
  57. system(split(/ /, $command)) == 0 or die "Cannot run wget\n";
  58. }
  59. if (not $start) {
  60. open (F,'<graph.cache') or warn "Cannot read graph.cache\n";
  61. print "Reading graph.cache...\n";
  62. undef $/;
  63. $data = <F>;
  64. close (F);
  65. open (F,'>graph.dot') or warn "Cannot write graph.dot\n";
  66. print "Writing graph.dot...\n";
  67. print "Using all pages...\n";
  68. print F "digraph links {\n";
  69. print F $data;
  70. print F "}\n";
  71. close (F);
  72. exit;
  73. }
  74. open(F,'graph.cache') or warn "Cannot read graph.cache\n";
  75. print "Reading graph.cache...\n";
  76. while($_ = <F>) {
  77. if (m/^"(.*?)" -> "(.*?)"$/) {
  78. push (@{$page{$1}}, $2);
  79. $score{$2}++;
  80. }
  81. }
  82. close(F);
  83. open(F,'>graph.dot') or warn "Cannot write graph.dot\n";
  84. print "Writing graph.dot...\n";
  85. print F "digraph links {\n";
  86. print "Starting with $start...\n";
  87. $count = 0;
  88. @pages = ($start);
  89. while ($count++ < $depth) {
  90. @current = @pages;
  91. foreach (@pages) {
  92. $done{$_} = 1;
  93. }
  94. @pages = ();
  95. foreach $page (@current) {
  96. @links = @{$page{$page}};
  97. @links = sort {$score{$a} <=> $score{$b}} @links; # only take pages with highest score
  98. @links = @links[0..$breadth-1] if $#links >= $breadth;
  99. next if $stop and eval "$page =~ /$stop/"; # no children for stop pages
  100. foreach $target (sort @links) {
  101. push(@pages, $target) unless $done{$target}; # don't cycle
  102. print F "\"$page\" -> \"$target\"\n";
  103. }
  104. }
  105. }
  106. print F "}\n";
  107. close(F);
  108. print "Done.\n";