claws.get.tlds.pl 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/perl -w
  2. =pod
  3. =head1
  4. claws.get.tlds.pl - IANA TLDs online list to stdout as gchar* array.
  5. Syntax:
  6. claws.get.tlds.pl [extra-domains.txt] > src/common/tlds.h
  7. Copyright (c) 2015 Ricardo Mones <ricardo@mones.org>
  8. This program is free software: you can redistribute it and/or modify it
  9. under the terms of the GNU General Public License as published by the
  10. Free Software Foundation, either version 3 of the License, or (at your
  11. option) any later version.
  12. This program is distributed in the hope that it will be useful, but
  13. WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. General Public License for more details.
  16. You should have received a copy of the GNU General Public License along
  17. with this program. If not, see <http://www.gnu.org/licenses/>.
  18. =cut
  19. use 5.012;
  20. use utf8;
  21. use LWP::Simple;
  22. use constant {
  23. URL => "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"
  24. };
  25. print "/*\n * This is a generated file.\n * See tools/claws.get.tlds.pl\n */\n";
  26. print "#ifndef __TLDS_H__\n#define __TLDS_H__\n\n";
  27. print "static const gchar *toplvl_domains [] = {\n\t"; # open array
  28. my $payload = get URL;
  29. die "Unable to retrieve IANA list of TLDs\n" unless defined $payload;
  30. my @lines = map { chomp; $_ } split /^/, $payload;
  31. my ($i, $j) = (0, 0);
  32. if (defined $ARGV[0] and -f $ARGV[0]) {
  33. my %domains = ();
  34. foreach (@lines) { $domains{$_} = "" unless (/^#.*$/) }
  35. open my $fh, '<', $ARGV[0] or die "Unable to open $ARGV[0] for reading\n";
  36. while (<$fh>) {
  37. chomp;
  38. push @lines, $_ if (/^#.*/ or not defined $domains{$_});
  39. }
  40. close $fh;
  41. }
  42. foreach (@lines) {
  43. ++$i;
  44. if (/^#(.*)$/) { # comments
  45. my $c = $1; $c =~ s/^\s+|\s+$//g;
  46. print "/* $c */\n\t";
  47. next;
  48. }
  49. next if (/^XN--.*$/); # IDNs not supported yet, see bug #1670
  50. my $tld = lc $_; # list comes in upper case
  51. print "\"$tld\""; ++$j;
  52. print ",\n\t" unless $i >= scalar @lines;
  53. print "\n" if $i >= scalar @lines;
  54. }
  55. print "};\n\n"; # close array
  56. print "#endif\n";