info-ref 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. #!/usr/bin/perl
  2. # Copyright (C) 2005, 2006, 2007, 2012 Alex Schroeder <alex@gnu.org>
  3. #
  4. # This program is free software: you can redistribute it and/or modify it under
  5. # the terms of the GNU General Public License as published by the Free Software
  6. # Foundation, either version 3 of the License, or (at your option) any later
  7. # version.
  8. #
  9. # This program is distributed in the hope that it will be useful, but WITHOUT
  10. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along with
  14. # this program. If not, see <http://www.gnu.org/licenses/>.
  15. use strict;
  16. use warnings;
  17. use CGI ;
  18. use CGI::Carp qw(fatalsToBrowser);
  19. use LWP::UserAgent;
  20. use XML::LibXML;
  21. use URI;
  22. my %indexes = (
  23. 'http://www.gnu.org/software/emacs/manual/html_node/emacs/Command-Index.html'
  24. => 'GNU Emacs manual, Command and Function Index',
  25. 'http://www.gnu.org/software/emacs/manual/html_node/emacs/Variable-Index.html'
  26. => 'GNU Emacs manual, Variable Index',
  27. 'http://www.gnu.org/software/emacs/manual/html_node/emacs/Concept-Index.html'
  28. => 'GNU Emacs manual, Concept Index',
  29. 'http://www.gnu.org/software/emacs/manual/html_node/emacs/index.html'
  30. => 'GNU Emacs manual, Top Menu',
  31. 'http://www.gnu.org/software/emacs/manual/html_node/elisp/Index.html'
  32. => 'GNU Emacs Lisp reference manual, Index',
  33. 'http://www.gnu.org/software/emacs/manual/html_node/elisp/index.html'
  34. => 'GNU Emacs Lisp reference manual, Top Menu',
  35. 'http://www.gnu.org/software/emacs/manual/html_node/message/Index.html'
  36. => 'Message Manual, Index',
  37. 'http://www.gnu.org/software/emacs/manual/html_node/gnus/Index.html'
  38. => 'The Gnus Newsreader, Index',
  39. 'http://www.gnu.org/software/emacs/manual/html_node/cl/Function-Index.html'
  40. => 'Common Lisp Extensions, Function Index',
  41. 'http://www.gnu.org/software/emacs/manual/html_node/ccmode/Variable-Index.html'
  42. => 'CC Mode Manual, Variable Index',
  43. 'http://www.gnu.org/software/emacs/manual/html_node/ccmode/Concept-and-Key-Index.html'
  44. => 'CC Mode Manual, Command and Function Index',
  45. 'http://www.gnu.org/software/emacs/manual/html_node/org/Index.html'
  46. => 'Org Mode Manual, Index',
  47. 'http://www.gnu.org/software/auctex/manual/auctex/Function-Index.html'
  48. => 'AUCTeX Manual, Function Index',
  49. 'http://www.gnu.org/software/auctex/manual/auctex/Variable-Index.html'
  50. => 'AUCTeX Manual, Variable Index',
  51. 'http://www.gnu.org/software/auctex/manual/auctex/Concept-Index.html'
  52. => 'AUCTeX Manual, Concept Index',
  53. 'http://www.gnu.org/software/texinfo/manual/texinfo/html_node/index.html'
  54. => 'Texinfo, Command and Variable Index',
  55. 'http://www.gnu.org/software/texinfo/manual/texinfo/html_node/General-Index.html'
  56. => 'Texinfo, General Index',
  57. 'http://www.gnu.org/software/texinfo/manual/info/html_node/Index.html'
  58. => 'Info, Index',
  59. 'http://www.gnu.org/software/emacs/manual/html_node/dired-x/Command-Index.html'
  60. => 'Dired Extra, Function Index',
  61. 'http://www.gnu.org/software/emacs/manual/html_node/dired-x/Variable-Index.html'
  62. => 'Dired Extra, Variable Index',
  63. 'http://www.gnu.org/software/coreutils/manual/html_node/Concept-index.html'
  64. => 'Coreutils, Index',
  65. 'http://www.gnu.org/software/diffutils/manual/html_node/Index.html'
  66. => 'Diffutils, Index',
  67. 'http://www.gnu.org/software/findutils/manual/html_node/find_html/Primary-Index.html'
  68. => 'Findutils, Primary Index',
  69. 'http://www.gnu.org/software/emacs/manual/html_node/ediff/Index.html'
  70. => 'Edfiff, Index',
  71. );
  72. my $db = '/org/org.emacswiki/htdocs/emacs/info-ref.dat';
  73. my $nl = "\n";
  74. my $fs = "\023";
  75. my $gs = "\024";
  76. my $rs = "\025";
  77. my $q = new CGI;
  78. ProcessRequest();
  79. sub ProcessRequest {
  80. if ($q->param('init')) {
  81. Initialize();
  82. } elsif ($q->param('find')) {
  83. Find($q->param('find'));
  84. } else {
  85. ShowForm();
  86. }
  87. }
  88. sub ShowForm {
  89. print $q->header, $q->start_html,
  90. $q->start_form, "Index entry: ", $q->textfield('find'), $q->submit, $q->end_form,
  91. $q->p($q->a({-href=>"http://www.emacswiki.org/scripts/info-ref"}, "Source"), $q->br(),
  92. 'Last DB update: ', TimeToText((stat($db))[9]),
  93. ' (' . $q->a({-href=>$q->url . '?init=1'}, "update") . ')'),
  94. $q->end_html;
  95. }
  96. sub Find {
  97. my $str = shift;
  98. my %map = ();
  99. my $data = ReadFileOrDie($db);
  100. foreach my $line (split(/$nl/, $data)) {
  101. my ($key, $rest) = split(/$fs/, $line);
  102. $map{$key} = ();
  103. if ($rest) {
  104. foreach my $a (split(/$gs/, $rest)) {
  105. my ($link, $label) = split(/$rs/, $a);
  106. $map{$key}{$link} = $label;
  107. }
  108. }
  109. }
  110. my @links = keys %{$map{$str}};
  111. if ($#links < 0) {
  112. ReportError("No matches found for '$str'", '404 Not Found');
  113. } elsif ($#links == 0) {
  114. print $q->redirect($links[0]);
  115. } else {
  116. my @list = map { $q->a({-href=>$_}, $map{$str}{$_}) } @links;
  117. print $q->header, $q->h1($str), $q->ol($q->li(\@list));
  118. }
  119. }
  120. sub Initialize {
  121. my %map = ();
  122. print $q->header, $q->start_html;
  123. foreach my $url (keys %indexes) {
  124. print $q->p($url);;
  125. # determine base URI
  126. my $base = URI->new($url);
  127. # fetch and parse data
  128. my $data = GetRaw($url);
  129. # some markup fixes for the elisp manual
  130. # $data =~ s/&([<"])/&amp;$1/g;
  131. # $data =~ s/<([<"])/&lt;$1/g;
  132. # $data =~ s/="fn_"">/="fn_&quot;">/;
  133. # $data =~ s/<!DOCTYPE.*?>//;
  134. # $data =~ s'</?font.*?>''gi;
  135. # $data =~ s'</table><br></P>'</table><br>';
  136. my $parser = XML::LibXML->new();
  137. my $doc;
  138. eval { $doc = $parser->parse_html_string($data); };
  139. print $q->p($@) if $@;
  140. next if $@;
  141. my @nodelist = $doc->findnodes('/html/body/ul/li/a');
  142. foreach my $node (@nodelist) {
  143. my $key = $node->textContent;
  144. my $href = $node->getAttribute('href');
  145. my $link = URI->new_abs($href, $base);
  146. # print "$key -> $label $l\n";
  147. $map{$key} = () unless $map{$key};
  148. $map{$key}{$link->canonical} = $indexes{$url};
  149. }
  150. # elisp manual
  151. # @nodelist = $doc->findnodes('descendant::table[position()=3]/descendant::tr');
  152. # foreach my $node (@nodelist) {
  153. # my ($item, $section) = $node->findnodes('td/a');
  154. # next unless $item and $section;
  155. # my $key = $item->textContent;
  156. # my $label = $section->textContent;
  157. # my $link = $item->getAttribute('href');
  158. # my $l = URI->new_abs($link, $base);
  159. # # print "$key -> $label $l\n";
  160. # $map{$key} = () unless $map{$key};
  161. # $map{$key}{$l->canonical} = $label;
  162. # }
  163. }
  164. my $data = join($nl, map {
  165. my $key = $_;
  166. $key . $fs . join($gs, map {
  167. my $link = $_;
  168. join($rs, $link, $map{$key}{$link});
  169. } keys %{$map{$_}})
  170. } keys %map);
  171. WriteStringToFile($db, $data);
  172. print $q->p('Database initialized'), $q->end_html;
  173. }
  174. sub GetRaw {
  175. my $uri = shift;
  176. return unless eval { require LWP::UserAgent; };
  177. my $ua = LWP::UserAgent->new;
  178. my $response = $ua->get($uri);
  179. return $response->decoded_content;
  180. }
  181. sub ReadFile {
  182. my ($filename) = @_;
  183. my ($data);
  184. local $/ = undef; # Read complete files
  185. if (open(IN, "<$filename")) {
  186. $data=<IN>;
  187. close IN;
  188. return (1, $data);
  189. }
  190. return (0, '');
  191. }
  192. sub ReadFileOrDie {
  193. my ($filename) = @_;
  194. my ($status, $data);
  195. ($status, $data) = ReadFile($filename);
  196. if (!$status) {
  197. ReportError("Cannot open $filename: $!", '500 Internal Server Error');
  198. }
  199. return $data;
  200. }
  201. sub WriteStringToFile {
  202. my ($file, $string) = @_;
  203. open(OUT, ">$file")
  204. or ReportError("Cannot write $file: $!", '500 Internal Server Error');
  205. print OUT $string;
  206. close(OUT);
  207. }
  208. sub ReportError { # fatal!
  209. my ($errmsg, $status, $log) = @_;
  210. print $q->header(-status => $status);
  211. print $q->start_html, $q->h2($errmsg), $q->end_html;
  212. exit (1);
  213. }
  214. sub CalcDay {
  215. my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(shift);
  216. return sprintf('%4d-%02d-%02d', $year+1900, $mon+1, $mday);
  217. }
  218. sub CalcTime {
  219. my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(shift);
  220. return sprintf('%02d:%02d UTC', $hour, $min);
  221. }
  222. sub TimeToText {
  223. my $t = shift;
  224. return CalcDay($t) . ' ' . CalcTime($t);
  225. }