123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- #!/usr/bin/perl
- # COPYRIGHT AND LICENSE
- # Copyright (C) 2005-2018 H.Merijn Brand
- #
- # This script is free software; you can redistribute it and/or modify it
- # under the same terms as Perl and/or Claws Mail itself. (GPL)
- use 5.14.1;
- use warnings;
- our $VERSION = "1.01 - 2018-10-08";
- our $CMD = $0 =~ s{.*/}{}r;
- sub usage {
- my ($err, $str) = (@_, "");
- $err and select STDERR;
- say "usage: $CMD [--html] [--type=<type>] file\n",
- " --html Generate HTML (if supported)\n",
- " --type=X X as mimetype (msword => doc)\n",
- " $CMD --list will show all implemented conversions";
- $str and say $str;
- exit $err;
- } # usage
- use Getopt::Long qw(:config bundling nopermute);
- my $opt_v = 0;
- my $opt_h = "text";
- GetOptions (
- "help|?" => sub { usage (0); },
- "V|version" => sub { say "$CMD [$VERSION]"; exit 0; },
- "v|verbose:1" => \$opt_v,
- "t|type|mimetype=s" => \my $opt_t,
- "h|html" => sub { $opt_h = "html" },
- "l|list!" => \my $opt_l,
- ) or usage (1);
- $opt_v and say "$0 @ARGV";
- # anon-list contains all possible commands to show content
- # plain text is a reference to same type (alias)
- # %f will be replaced with file. If no %f, file will be the last arg
- my %fh = (
- text => {
- bin => [ "strings" ], # fallback for binary files
- txt => [ "cat" ], # Plain text
- html => [ "htm2txt",
- "html2text" ], # HTML
- msword => "doc",
- doc => [ "catdoc -x -dutf-8",
- "wvText",
- "antiword -w 72" ], # M$ Word
- "vnd.ms-excel" => "xls",
- "ms-excel" => "xls",
- docx => [ "unoconv -f text --stdout" ], # MS Word
- xlsx => "xls",
- xls => [ "xlscat -L",
- "catdoc -x -dutf-8",
- "wvText" ], # M$ Excel
- # ppt => [ "ppthtml" ], # M$ PowerPoint
- # ppthtml "$1" | html2text
- csv => "xls", # Comma Separated Values
- ics => [ "ics2txt" ], # ICS calendar request
- rtf => [ "rtf2text",
- "unrtf -t text" ], # RTF
- pdf => [ "pdftotext %f -" ], # Adobe PDF
- ods => "xls", # OpenOffice spreadsheet
- sxc => "xls", # OpenOffice spreadsheet
- odt => [ "oo2pod %f | pod2text",
- "ooo2txt" ], # OpenOffice writer
- rtf => [ "rtf2text" ], # RTF
- pl => [ "perltidy -st -se",
- "cat" ], # Perl
- pm => "pl",
- jsn => [ "json_pp" ], # JSON
- json => "jsn",
- xml => [ "xml_pp" ], # XML
- ( map { $_ => "txt" } qw(
- patch diff
- c h ic ec cc
- sh sed awk
- plain
- yml yaml
- )),
- bz2 => [ "bzip2 -d < %f | strings" ],
- zip => [ "unzip -l %f" ], # ZIP
- test => [ \&test ], # Internal
- tgz => [ "tar tvf" ], # Tar uncompressed
- tgz => [ "tar tzvf" ], # Tar GZ compressed
- tbz => [ "tar tjvf" ], # Tar BZip2 compressed
- txz => [ "tar tJvf" ], # Tar XZ compressed
- rar => [ "unrar l" ], # RAR
- },
- html => {
- rtf => [ "rtf2html" ],
- },
- );
- if ($opt_l) {
- my %tc = %{$fh{text}};
- foreach my $ext (sort keys %tc) {
- my $exe = $tc{$ext};
- ref $exe or $exe = $tc{$exe};
- printf " .%-12s %s\n", $ext, $_ for @$exe;
- }
- exit 0;
- }
- my $file = shift or usage (1, "File argument is missing");
- -f $file or usage (1, "File argument is not a plain file");
- -r $file or usage (1, "File argument is not a readable file");
- -s $file or usage (1, "File argument is an empty file");
- my $ext = $file =~ m/\.(\w+)$/ ? lc $1 : "";
- $opt_t && exists $fh{text}{lc $opt_t} and $ext = lc$opt_t;
- unless (exists $fh{text}{$ext}) {
- my $ftype = `file --brief $file`;
- $ext =
- $ftype =~ m/^pdf doc/i ? "pdf" :
- $ftype =~ m/^ascii( english)? text/i ? "txt" :
- $ftype =~ m/^(utf-8 unicode|iso-\d+)( english)? text/i ? "txt" :
- $ftype =~ m/^xml doc/i ? "xml" :
- $ftype =~ m/^\w+ compress/i ? "bin" :
- "bin" ;
- # \w+ archive
- # \w+ image
- # ...
- }
- $ext ||= "txt";
- exists $fh{$opt_h}{$ext} or $opt_h = "text";
- exists $fh{$opt_h}{$ext} or $ext = "txt";
- my $ref = $fh{$opt_h}{$ext};
- ref $ref or $ref = $fh{$opt_h}{$ref};
- $opt_v and warn "[ @$ref ] $file\n";
- sub which {
- (my $cmd = shift) =~ s/\s.*//; # Only the command. Discard arguments here
- foreach my $path (split m/:+/, $ENV{PATH}) {
- -x "$path/$cmd" and return "$path/$cmd";
- }
- return 0;
- } # which
- my $cmd = "cat -ve";
- foreach my $c (@$ref) {
- if (ref $c) {
- $c->($file);
- exit;
- }
- my $cp = which ($c) or next;
- $cmd = $c;
- last;
- }
- my @cmd = split m/ +/ => $cmd;
- grep { s/%f\b/$file/ } @cmd or push @cmd, $file;
- #$cmd =~ s/%f\b/$file/g or $cmd .= " $file";
- $opt_v and say "@cmd";
- exec @cmd;
|