123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- #!/usr/bin/perl
- # Copyright (C) 2005, 2006, 2007, 2012 Alex Schroeder <alex@gnu.org>
- #
- # This program is free software: you can redistribute it and/or modify it under
- # the terms of the GNU General Public License as published by the Free Software
- # Foundation, either version 3 of the License, or (at your option) any later
- # version.
- #
- # This program is distributed in the hope that it will be useful, but WITHOUT
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License along with
- # this program. If not, see <http://www.gnu.org/licenses/>.
- use strict;
- use warnings;
- use CGI ;
- use CGI::Carp qw(fatalsToBrowser);
- use LWP::UserAgent;
- use XML::LibXML;
- use URI;
- my %indexes = (
- 'http://www.gnu.org/software/emacs/manual/html_node/emacs/Command-Index.html'
- => 'GNU Emacs manual, Command and Function Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/emacs/Variable-Index.html'
- => 'GNU Emacs manual, Variable Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/emacs/Concept-Index.html'
- => 'GNU Emacs manual, Concept Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/emacs/index.html'
- => 'GNU Emacs manual, Top Menu',
- 'http://www.gnu.org/software/emacs/manual/html_node/elisp/Index.html'
- => 'GNU Emacs Lisp reference manual, Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/elisp/index.html'
- => 'GNU Emacs Lisp reference manual, Top Menu',
- 'http://www.gnu.org/software/emacs/manual/html_node/message/Index.html'
- => 'Message Manual, Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/gnus/Index.html'
- => 'The Gnus Newsreader, Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/cl/Function-Index.html'
- => 'Common Lisp Extensions, Function Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/ccmode/Variable-Index.html'
- => 'CC Mode Manual, Variable Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/ccmode/Concept-and-Key-Index.html'
- => 'CC Mode Manual, Command and Function Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/org/Index.html'
- => 'Org Mode Manual, Index',
- 'http://www.gnu.org/software/auctex/manual/auctex/Function-Index.html'
- => 'AUCTeX Manual, Function Index',
- 'http://www.gnu.org/software/auctex/manual/auctex/Variable-Index.html'
- => 'AUCTeX Manual, Variable Index',
- 'http://www.gnu.org/software/auctex/manual/auctex/Concept-Index.html'
- => 'AUCTeX Manual, Concept Index',
- 'http://www.gnu.org/software/texinfo/manual/texinfo/html_node/index.html'
- => 'Texinfo, Command and Variable Index',
- 'http://www.gnu.org/software/texinfo/manual/texinfo/html_node/General-Index.html'
- => 'Texinfo, General Index',
- 'http://www.gnu.org/software/texinfo/manual/info/html_node/Index.html'
- => 'Info, Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/dired-x/Command-Index.html'
- => 'Dired Extra, Function Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/dired-x/Variable-Index.html'
- => 'Dired Extra, Variable Index',
- 'http://www.gnu.org/software/coreutils/manual/html_node/Concept-index.html'
- => 'Coreutils, Index',
- 'http://www.gnu.org/software/diffutils/manual/html_node/Index.html'
- => 'Diffutils, Index',
- 'http://www.gnu.org/software/findutils/manual/html_node/find_html/Primary-Index.html'
- => 'Findutils, Primary Index',
- 'http://www.gnu.org/software/emacs/manual/html_node/ediff/Index.html'
- => 'Edfiff, Index',
- );
- my $db = '/org/org.emacswiki/htdocs/emacs/info-ref.dat';
- my $nl = "\n";
- my $fs = "\023";
- my $gs = "\024";
- my $rs = "\025";
- my $q = new CGI;
- ProcessRequest();
- sub ProcessRequest {
- if ($q->param('init')) {
- Initialize();
- } elsif ($q->param('find')) {
- Find($q->param('find'));
- } else {
- ShowForm();
- }
- }
- sub ShowForm {
- print $q->header, $q->start_html,
- $q->start_form, "Index entry: ", $q->textfield('find'), $q->submit, $q->end_form,
- $q->p($q->a({-href=>"http://www.emacswiki.org/scripts/info-ref"}, "Source"), $q->br(),
- 'Last DB update: ', TimeToText((stat($db))[9]),
- ' (' . $q->a({-href=>$q->url . '?init=1'}, "update") . ')'),
- $q->end_html;
- }
- sub Find {
- my $str = shift;
- my %map = ();
- my $data = ReadFileOrDie($db);
- foreach my $line (split(/$nl/, $data)) {
- my ($key, $rest) = split(/$fs/, $line);
- $map{$key} = ();
- if ($rest) {
- foreach my $a (split(/$gs/, $rest)) {
- my ($link, $label) = split(/$rs/, $a);
- $map{$key}{$link} = $label;
- }
- }
- }
- my @links = keys %{$map{$str}};
- if ($#links < 0) {
- ReportError("No matches found for '$str'", '404 Not Found');
- } elsif ($#links == 0) {
- print $q->redirect($links[0]);
- } else {
- my @list = map { $q->a({-href=>$_}, $map{$str}{$_}) } @links;
- print $q->header, $q->h1($str), $q->ol($q->li(\@list));
- }
- }
- sub Initialize {
- my %map = ();
- print $q->header, $q->start_html;
- foreach my $url (keys %indexes) {
- print $q->p($url);;
- # determine base URI
- my $base = URI->new($url);
- # fetch and parse data
- my $data = GetRaw($url);
- # some markup fixes for the elisp manual
- # $data =~ s/&([<"])/&$1/g;
- # $data =~ s/<([<"])/<$1/g;
- # $data =~ s/="fn_"">/="fn_"">/;
- # $data =~ s/<!DOCTYPE.*?>//;
- # $data =~ s'</?font.*?>''gi;
- # $data =~ s'</table><br></P>'</table><br>';
- my $parser = XML::LibXML->new();
- my $doc;
- eval { $doc = $parser->parse_html_string($data); };
- print $q->p($@) if $@;
- next if $@;
- my @nodelist = $doc->findnodes('/html/body/ul/li/a');
- foreach my $node (@nodelist) {
- my $key = $node->textContent;
- my $href = $node->getAttribute('href');
- my $link = URI->new_abs($href, $base);
- # print "$key -> $label $l\n";
- $map{$key} = () unless $map{$key};
- $map{$key}{$link->canonical} = $indexes{$url};
- }
- # elisp manual
- # @nodelist = $doc->findnodes('descendant::table[position()=3]/descendant::tr');
- # foreach my $node (@nodelist) {
- # my ($item, $section) = $node->findnodes('td/a');
- # next unless $item and $section;
- # my $key = $item->textContent;
- # my $label = $section->textContent;
- # my $link = $item->getAttribute('href');
- # my $l = URI->new_abs($link, $base);
- # # print "$key -> $label $l\n";
- # $map{$key} = () unless $map{$key};
- # $map{$key}{$l->canonical} = $label;
- # }
- }
- my $data = join($nl, map {
- my $key = $_;
- $key . $fs . join($gs, map {
- my $link = $_;
- join($rs, $link, $map{$key}{$link});
- } keys %{$map{$_}})
- } keys %map);
- WriteStringToFile($db, $data);
- print $q->p('Database initialized'), $q->end_html;
- }
- sub GetRaw {
- my $uri = shift;
- return unless eval { require LWP::UserAgent; };
- my $ua = LWP::UserAgent->new;
- my $response = $ua->get($uri);
- return $response->decoded_content;
- }
- sub ReadFile {
- my ($filename) = @_;
- my ($data);
- local $/ = undef; # Read complete files
- if (open(IN, "<$filename")) {
- $data=<IN>;
- close IN;
- return (1, $data);
- }
- return (0, '');
- }
- sub ReadFileOrDie {
- my ($filename) = @_;
- my ($status, $data);
- ($status, $data) = ReadFile($filename);
- if (!$status) {
- ReportError("Cannot open $filename: $!", '500 Internal Server Error');
- }
- return $data;
- }
- sub WriteStringToFile {
- my ($file, $string) = @_;
- open(OUT, ">$file")
- or ReportError("Cannot write $file: $!", '500 Internal Server Error');
- print OUT $string;
- close(OUT);
- }
- sub ReportError { # fatal!
- my ($errmsg, $status, $log) = @_;
- print $q->header(-status => $status);
- print $q->start_html, $q->h2($errmsg), $q->end_html;
- exit (1);
- }
- sub CalcDay {
- my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(shift);
- return sprintf('%4d-%02d-%02d', $year+1900, $mon+1, $mday);
- }
- sub CalcTime {
- my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(shift);
- return sprintf('%02d:%02d UTC', $hour, $min);
- }
- sub TimeToText {
- my $t = shift;
- return CalcDay($t) . ' ' . CalcTime($t);
- }
|