textviewer.pl 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/perl
  2. # COPYRIGHT AND LICENSE
  3. # Copyright (C) 2005-2018 H.Merijn Brand
  4. #
  5. # This script is free software; you can redistribute it and/or modify it
  6. # under the same terms as Perl and/or Claws Mail itself. (GPL)
  7. use 5.14.1;
  8. use warnings;
  9. our $VERSION = "1.01 - 2018-10-08";
  10. our $CMD = $0 =~ s{.*/}{}r;
  11. sub usage {
  12. my ($err, $str) = (@_, "");
  13. $err and select STDERR;
  14. say "usage: $CMD [--html] [--type=<type>] file\n",
  15. " --html Generate HTML (if supported)\n",
  16. " --type=X X as mimetype (msword => doc)\n",
  17. " $CMD --list will show all implemented conversions";
  18. $str and say $str;
  19. exit $err;
  20. } # usage
  21. use Getopt::Long qw(:config bundling nopermute);
  22. my $opt_v = 0;
  23. my $opt_h = "text";
  24. GetOptions (
  25. "help|?" => sub { usage (0); },
  26. "V|version" => sub { say "$CMD [$VERSION]"; exit 0; },
  27. "v|verbose:1" => \$opt_v,
  28. "t|type|mimetype=s" => \my $opt_t,
  29. "h|html" => sub { $opt_h = "html" },
  30. "l|list!" => \my $opt_l,
  31. ) or usage (1);
  32. $opt_v and say "$0 @ARGV";
  33. # anon-list contains all possible commands to show content
  34. # plain text is a reference to same type (alias)
  35. # %f will be replaced with file. If no %f, file will be the last arg
  36. my %fh = (
  37. text => {
  38. bin => [ "strings" ], # fallback for binary files
  39. txt => [ "cat" ], # Plain text
  40. html => [ "htm2txt",
  41. "html2text" ], # HTML
  42. msword => "doc",
  43. doc => [ "catdoc -x -dutf-8",
  44. "wvText",
  45. "antiword -w 72" ], # M$ Word
  46. "vnd.ms-excel" => "xls",
  47. "ms-excel" => "xls",
  48. docx => [ "unoconv -f text --stdout" ], # MS Word
  49. xlsx => "xls",
  50. xls => [ "xlscat -L",
  51. "catdoc -x -dutf-8",
  52. "wvText" ], # M$ Excel
  53. # ppt => [ "ppthtml" ], # M$ PowerPoint
  54. # ppthtml "$1" | html2text
  55. csv => "xls", # Comma Separated Values
  56. ics => [ "ics2txt" ], # ICS calendar request
  57. rtf => [ "rtf2text",
  58. "unrtf -t text" ], # RTF
  59. pdf => [ "pdftotext %f -" ], # Adobe PDF
  60. ods => "xls", # OpenOffice spreadsheet
  61. sxc => "xls", # OpenOffice spreadsheet
  62. odt => [ "oo2pod %f | pod2text",
  63. "ooo2txt" ], # OpenOffice writer
  64. rtf => [ "rtf2text" ], # RTF
  65. pl => [ "perltidy -st -se",
  66. "cat" ], # Perl
  67. pm => "pl",
  68. jsn => [ "json_pp" ], # JSON
  69. json => "jsn",
  70. xml => [ "xml_pp" ], # XML
  71. ( map { $_ => "txt" } qw(
  72. patch diff
  73. c h ic ec cc
  74. sh sed awk
  75. plain
  76. yml yaml
  77. )),
  78. bz2 => [ "bzip2 -d < %f | strings" ],
  79. zip => [ "unzip -l %f" ], # ZIP
  80. test => [ \&test ], # Internal
  81. tgz => [ "tar tvf" ], # Tar uncompressed
  82. tgz => [ "tar tzvf" ], # Tar GZ compressed
  83. tbz => [ "tar tjvf" ], # Tar BZip2 compressed
  84. txz => [ "tar tJvf" ], # Tar XZ compressed
  85. rar => [ "unrar l" ], # RAR
  86. },
  87. html => {
  88. rtf => [ "rtf2html" ],
  89. },
  90. );
  91. if ($opt_l) {
  92. my %tc = %{$fh{text}};
  93. foreach my $ext (sort keys %tc) {
  94. my $exe = $tc{$ext};
  95. ref $exe or $exe = $tc{$exe};
  96. printf " .%-12s %s\n", $ext, $_ for @$exe;
  97. }
  98. exit 0;
  99. }
  100. my $file = shift or usage (1, "File argument is missing");
  101. -f $file or usage (1, "File argument is not a plain file");
  102. -r $file or usage (1, "File argument is not a readable file");
  103. -s $file or usage (1, "File argument is an empty file");
  104. my $ext = $file =~ m/\.(\w+)$/ ? lc $1 : "";
  105. $opt_t && exists $fh{text}{lc $opt_t} and $ext = lc$opt_t;
  106. unless (exists $fh{text}{$ext}) {
  107. my $ftype = `file --brief $file`;
  108. $ext =
  109. $ftype =~ m/^pdf doc/i ? "pdf" :
  110. $ftype =~ m/^ascii( english)? text/i ? "txt" :
  111. $ftype =~ m/^(utf-8 unicode|iso-\d+)( english)? text/i ? "txt" :
  112. $ftype =~ m/^xml doc/i ? "xml" :
  113. $ftype =~ m/^\w+ compress/i ? "bin" :
  114. "bin" ;
  115. # \w+ archive
  116. # \w+ image
  117. # ...
  118. }
  119. $ext ||= "txt";
  120. exists $fh{$opt_h}{$ext} or $opt_h = "text";
  121. exists $fh{$opt_h}{$ext} or $ext = "txt";
  122. my $ref = $fh{$opt_h}{$ext};
  123. ref $ref or $ref = $fh{$opt_h}{$ref};
  124. $opt_v and warn "[ @$ref ] $file\n";
  125. sub which {
  126. (my $cmd = shift) =~ s/\s.*//; # Only the command. Discard arguments here
  127. foreach my $path (split m/:+/, $ENV{PATH}) {
  128. -x "$path/$cmd" and return "$path/$cmd";
  129. }
  130. return 0;
  131. } # which
  132. my $cmd = "cat -ve";
  133. foreach my $c (@$ref) {
  134. if (ref $c) {
  135. $c->($file);
  136. exit;
  137. }
  138. my $cp = which ($c) or next;
  139. $cmd = $c;
  140. last;
  141. }
  142. my @cmd = split m/ +/ => $cmd;
  143. grep { s/%f\b/$file/ } @cmd or push @cmd, $file;
  144. #$cmd =~ s/%f\b/$file/g or $cmd .= " $file";
  145. $opt_v and say "@cmd";
  146. exec @cmd;