textviewer.pl 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/perl
  2. use strict;
  3. use warnings;
  4. sub usage ($;$)
  5. {
  6. my ($err, $str) = (@_, "");
  7. $err and select STDERR;
  8. print
  9. "usage: $0 [--html] [--type=<type>] file\n",
  10. " --html Generate HTML (if supported)\n",
  11. " --type=X X as mimetype (msword => doc)\n";
  12. $str and print "$str\n";
  13. exit $err;
  14. } # usage
  15. @ARGV == 1 and $ARGV[0] eq "-?" || $ARGV[0] =~ m/^-+help$/ and usage (0);
  16. use Getopt::Long qw(:config bundling nopermute);
  17. my $opt_v = 0;
  18. my $opt_t;
  19. my $opt_h = "text";
  20. GetOptions (
  21. "v|verbose:1" => \$opt_v,
  22. "t|type|mimetype=s" => \$opt_t,
  23. "h|html" => sub { $opt_h = "html" },
  24. ) or usage (1);
  25. $opt_v and print STDERR "$0 @ARGV\n";
  26. my $file = shift or usage (1, "File argument is missing");
  27. -f $file or usage (1, "File argument is not a plain file");
  28. -r $file or usage (1, "File argument is not a readable file");
  29. -s $file or usage (1, "File argument is an empty file");
  30. # anon-list contains all possible commands to show content
  31. # plain text is a reference to same type (alias)
  32. # %f will be replaced with file. If no %f, file will be the last arg
  33. my %fh = (
  34. text => {
  35. bin => [ "strings" ], # fallback for binary files
  36. txt => [ "cat" ], # Plain text
  37. html => [ "txt2htm",
  38. "text2html" ], # HTML
  39. msword => "doc",
  40. doc => [ "antiword -w 72" ], # M$ Word
  41. "vnd.ms-excel" => "xls",
  42. "ms-excel" => "xls",
  43. xls => [ "xlscat -L" ], # M$ Excel
  44. # ppt => [ "ppthtml" ], # M$ PowerPoint
  45. # ppthtml "$1" | html2text
  46. rtf => [ "rtf2text",
  47. "unrtf -t text" ], # RTF
  48. pdf => [ "pdftotext %f -" ], # Adobe PDF
  49. sxc => "xls", # OpenOffice spreadsheet
  50. odt => [ "ooo2txt" ], # OpenOffice writer
  51. pl => [ "perltidy -st -se",
  52. "cat" ], # Perl
  53. pm => "pl",
  54. ( map { $_ => "txt" } qw(
  55. diff
  56. c h ic ec cc
  57. sh sed awk
  58. plain
  59. )),
  60. test => [ \&test ], # Internal
  61. },
  62. html => {
  63. rtf => [ "rtf2html" ],
  64. },
  65. );
  66. my $ext = $file =~ m/\.(\w+)$/ ? lc $1 : "";
  67. $opt_t && exists $fh{text}{lc $opt_t} and $ext = lc$opt_t;
  68. unless (exists $fh{text}{$ext}) {
  69. my $ftype = `file --brief $file`;
  70. $ext =
  71. $ftype =~ m/^pdf doc/i ? "pdf" :
  72. $ftype =~ m/^ascii( english)? text/i ? "txt" :
  73. $ftype =~ m/^(utf-8 unicode|iso-\d+)( english)? text/i ? "txt" :
  74. $ftype =~ m/^xml doc/i ? "xml" :
  75. $ftype =~ m/^\w+ compress/i ? "bin" :
  76. "bin" ;
  77. # \w+ archive
  78. # \w+ image
  79. # ...
  80. }
  81. $ext ||= "txt";
  82. exists $fh{$opt_h}{$ext} or $opt_h = "text";
  83. exists $fh{$opt_h}{$ext} or $ext = "txt";
  84. my $ref = $fh{$opt_h}{$ext};
  85. ref $ref or $ref = $fh{$opt_h}{$ref};
  86. $opt_v and print STDERR "[ @$ref ] $file\n";
  87. sub which ($)
  88. {
  89. (my $cmd = shift) =~ s/\s.*//; # Only the command. Discard arguments here
  90. foreach my $path (split m/:+/, $ENV{PATH}) {
  91. -x "$path/$cmd" and return "$path/$cmd";
  92. }
  93. return 0;
  94. } # which
  95. my $cmd = "cat -ve";
  96. foreach my $c (@$ref) {
  97. if (ref $c) {
  98. $c->($file);
  99. exit;
  100. }
  101. my $cp = which ($c) or next;
  102. $cmd = $c;
  103. last;
  104. }
  105. $cmd =~ s/%f\b/$file/g or $cmd .= " $file";
  106. $opt_v and print STDERR "$cmd\n";
  107. exec $cmd;