oeis_drafts.pl 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. #!/usr/bin/perl
  2. # Daniel "Trizen" Șuteu
  3. # Date: 07 April 2019
  4. # https://github.com/trizen
  5. # Get the list of OEIS drafts and generate an HTML file, highlighting the sequences that need more terms.
  6. use 5.014;
  7. use strict;
  8. use warnings;
  9. use LWP::UserAgent::Cached;
  10. use HTML::Entities qw(decode_entities encode_entities);
  11. require LWP::UserAgent;
  12. require HTTP::Message;
  13. use constant {
  14. USE_TOR_PROXY => 0, # true to use the Tor proxy (127.0.0.1:9050)
  15. };
  16. my $cache_dir = 'cache';
  17. if (not -d $cache_dir) {
  18. mkdir($cache_dir);
  19. }
  20. my $lwp = LWP::UserAgent::Cached->new(
  21. timeout => 60,
  22. show_progress => 1,
  23. agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0',
  24. cache_dir => $cache_dir,
  25. ssl_opts => {verify_hostname => 1, SSL_version => 'TLSv1_3'},
  26. nocache_if => sub {
  27. my ($response) = @_;
  28. my $code = $response->code;
  29. return 1 if ($code >= 500); # do not cache any bad response
  30. return 1 if ($code == 401); # don't cache an unauthorized response
  31. return 1 if ($response->request->method ne 'GET'); # cache only GET requests
  32. return;
  33. },
  34. );
  35. my $lwp_uc = LWP::UserAgent->new(
  36. timeout => 60,
  37. show_progress => 1,
  38. agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0',
  39. ssl_opts => {verify_hostname => 1, SSL_version => 'TLSv1_3'},
  40. );
  41. {
  42. state $accepted_encodings = HTTP::Message::decodable();
  43. $lwp->default_header('Accept-Encoding' => $accepted_encodings);
  44. $lwp_uc->default_header('Accept-Encoding' => $accepted_encodings);
  45. require LWP::ConnCache;
  46. my $cache = LWP::ConnCache->new;
  47. $cache->total_capacity(undef); # no limit
  48. $lwp->conn_cache($cache);
  49. $lwp_uc->conn_cache($cache);
  50. }
  51. if (USE_TOR_PROXY) {
  52. $lwp->proxy(['http', 'https'], "socks://127.0.0.1:9050");
  53. $lwp_uc->proxy(['http', 'https'], "socks://127.0.0.1:9050");
  54. }
  55. my @all_ids;
  56. my $start = 0;
  57. while (1) {
  58. my $content = $lwp_uc->get("https://oeis.org/draft?start=$start")->decoded_content;
  59. my @ids;
  60. while ($content =~ m{<td><a href="/draft/(A\d+)">A\d+</a>}g) {
  61. push @ids, $1;
  62. }
  63. @ids || last;
  64. push @all_ids, @ids;
  65. $start += 100;
  66. }
  67. say "Found: ", scalar(@all_ids), " ids";
  68. open my $fh, '>:utf8', 'links.html';
  69. print $fh <<'EOF';
  70. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
  71. <html>
  72. <head>
  73. <style>
  74. tt { font-family: monospace; font-size: 100%; }
  75. p.editing { font-family: monospace; margin: 10px; text-indent: -10px; word-wrap:break-word;}
  76. p { word-wrap: break-word; }
  77. </style>
  78. <meta http-equiv="content-type" content="text/html; charset=utf-8">
  79. <title>OEIS links</title>
  80. </head>
  81. <body bgcolor=#ffffff>
  82. EOF
  83. #~ say $fh "<ul>";
  84. sub remove_tags {
  85. my ($str) = @_;
  86. $str =~ s/<.*?>//gs;
  87. join(' ', split(' ', $str));
  88. }
  89. my $k = 1;
  90. foreach my $id (@all_ids) {
  91. my $url = "https://oeis.org/draft/$id";
  92. my $content = $lwp->get($url)->decoded_content;
  93. my $more = 0;
  94. if ( $content =~ m{<span title="(.*?)">more</span>}
  95. or $content =~ m{<span title="(.*?)">hard</span>}) {
  96. $more = 1;
  97. }
  98. my $author = '';
  99. my $name = '';
  100. if ($content =~ m{.*<font size=-2>NAME</font>.*?(?!<tt>\s*<del>)<tt>(.*?)</tt>}s) {
  101. $name = remove_tags($1);
  102. }
  103. if ($content =~ m{.*<font size=-2>AUTHOR</font>.*?<ins>(.*?)</ins>}s) {
  104. $author = remove_tags($1);
  105. }
  106. my $tname = $name;
  107. if ($more) {
  108. $tname = "<big><b>$tname</b></big>";
  109. }
  110. say $fh "<pre>" . $tname . " -- $author</pre>";
  111. say $fh "<ul>";
  112. say $fh "<li> [$k] <a href=$url>$url</a> </li>";
  113. say $fh "</ul>";
  114. #say $fh "<li>[$k] <a href=$url>$url</a><br> -- $name -- $author</li>";
  115. ++$k;
  116. }
  117. #~ say $fh "</ul>";
  118. say $fh "</body></html>";