Text.pm 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867
  1. # Text.pm: output tree as simple text.
  2. #
  3. # Copyright 2010, 2011, 2012, 2013, 2014, 2015, 2016 Free Software Foundation,
  4. # Inc.,
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 3 of the License,
  9. # or (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. # Original author: Patrice Dumas <pertusus@free.fr>
  20. package Texinfo::Convert::Text;
  21. use 5.00405;
  22. use strict;
  23. # accent commands list.
  24. use Texinfo::Common;
  25. use Texinfo::Convert::Unicode;
  26. # for debugging
  27. use Texinfo::Convert::Texinfo;
  28. use Data::Dumper;
  29. use Carp qw(cluck carp);
  30. require Exporter;
  31. use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  32. @ISA = qw(Exporter);
  33. # Items to export into callers namespace by default. Note: do not export
  34. # names by default without a very good reason. Use EXPORT_OK instead.
  35. # Do not simply export all your public functions/methods/constants.
  36. # This allows declaration use Texinfo::Convert::Text ':all';
  37. # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
  38. # will save memory.
  39. %EXPORT_TAGS = ( 'all' => [ qw(
  40. convert
  41. ascii_accent
  42. text_accents
  43. ) ] );
  44. @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
  45. @EXPORT = qw(
  46. );
  47. $VERSION = '6.3.90';
  48. # this is in fact not needed for 'footnote', 'shortcaption', 'caption'
  49. # when they have no brace_command_arg, see below.
  50. my %ignored_brace_commands;
  51. foreach my $ignored_brace_command (#'xref','ref','pxref','inforef',
  52. 'anchor',
  53. 'footnote', 'shortcaption', 'caption', 'hyphenation', 'errormsg') {
  54. $ignored_brace_commands{$ignored_brace_command} = 1;
  55. }
  56. my %ignored_block_commands;
  57. foreach my $ignored_command ('titlepage', 'copying', 'documentdescription',
  58. 'html', 'tex', 'xml', 'docbook', 'ignore', 'macro', 'rmacro') {
  59. $ignored_block_commands{$ignored_command} = 1;
  60. }
  61. # used by Texinfo::Convert::NodeNormalization
  62. our %text_brace_no_arg_commands = (
  63. 'TeX' => 'TeX',
  64. 'LaTeX' => 'LaTeX',
  65. 'bullet' => '*',
  66. 'copyright' => '(C)',
  67. 'registeredsymbol' => '(R)',
  68. 'dots' => '...',
  69. 'enddots' => '...',
  70. 'equiv' => '==',
  71. 'error' => 'error-->',
  72. 'expansion' => '==>',
  73. 'arrow' => '->',
  74. 'minus' => '-',
  75. 'point' => '-!-',
  76. 'print' => '-|',
  77. 'result' => '=>',
  78. 'today' => '',
  79. 'aa' => 'aa',
  80. 'AA' => 'AA',
  81. 'ae' => 'ae',
  82. 'oe' => 'oe',
  83. 'AE' => 'AE',
  84. 'OE' => 'OE',
  85. 'o' => '/o',
  86. 'O' => '/O',
  87. 'ss' => 'ss',
  88. 'l' => '/l',
  89. 'L' => '/L',
  90. 'DH' => 'D',
  91. 'dh' => 'd',
  92. 'TH' => 'TH', # http://www.evertype.com/standards/wynnyogh/thorn.html
  93. 'th' => 'th',
  94. 'exclamdown' => '!',
  95. 'questiondown' => '?',
  96. 'pounds' => '#',
  97. 'ordf' => 'a',
  98. 'ordm' => 'o',
  99. 'comma' => ',',
  100. 'atchar' => '@',
  101. 'lbracechar' => '{',
  102. 'rbracechar' => '}',
  103. 'backslashchar' => '\\',
  104. 'hashchar' => '#',
  105. 'euro' => 'Euro',
  106. 'geq' => '>=',
  107. 'leq' => '<=',
  108. 'tie' => ' ',
  109. 'textdegree' => 'o',
  110. 'quotedblleft' => '``',
  111. 'quotedblright' => "''",
  112. 'quoteleft' => '`',
  113. 'quoteright' => "'",
  114. 'quotedblbase' => ',,',
  115. 'quotesinglbase' => ',',
  116. 'guillemetleft' => '<<',
  117. 'guillemetright' => '>>',
  118. 'guillemotleft' => '<<',
  119. 'guillemotright' => '>>',
  120. 'guilsinglleft' => '<',
  121. 'guilsinglright' => '>',
  122. 'click' => '', # specially treated
  123. );
  124. my %sort_brace_no_arg_commands = (
  125. 'copyright' => 'C',
  126. 'registeredsymbol' => 'R',
  127. 'today' => 't',
  128. );
  129. foreach my $accent_letter ('o','O','l','L') {
  130. $sort_brace_no_arg_commands{$accent_letter} = $accent_letter;
  131. }
  132. my %accent_commands = %Texinfo::Common::accent_commands;
  133. my %no_brace_commands = %Texinfo::Common::no_brace_commands;
  134. our %formatting_misc_commands;
  135. foreach my $command ('verbatiminclude', 'sp', 'center', 'exdent',
  136. 'item', 'itemx', 'tab', 'headitem',
  137. 'node', keys(%Texinfo::Common::sectioning_commands)) {
  138. $formatting_misc_commands{$command} = 1;
  139. }
  140. my %ignored_types;
  141. foreach my $type ('empty_line_after_command', 'preamble',
  142. 'empty_spaces_after_command', 'spaces_at_end',
  143. 'empty_spaces_before_argument', 'empty_spaces_before_paragraph',
  144. 'empty_spaces_after_close_brace') {
  145. $ignored_types{$type} = 1;
  146. }
  147. sub ascii_accent($$)
  148. {
  149. my $text = shift;
  150. my $command = shift;
  151. my $accent = $command->{'cmdname'};
  152. return $text if ($accent eq 'dotless');
  153. return $text . "''" if ($accent eq 'H');
  154. return $text . '.' if ($accent eq 'dotaccent');
  155. return $text . '*' if ($accent eq 'ringaccent');
  156. return $text . '[' if ($accent eq 'tieaccent');
  157. return $text . '(' if ($accent eq 'u');
  158. return $text . '_' if ($accent eq 'ubaraccent');
  159. return '.' . $text if ($accent eq 'udotaccent');
  160. return $text . '<' if ($accent eq 'v');
  161. return $text . ';' if ($accent eq 'ogonek');
  162. return $text . $accent;
  163. }
  164. # format a stack of accents as ascii
  165. sub ascii_accents($$;$)
  166. {
  167. my $result = shift;
  168. my $stack = shift;
  169. my $set_case = shift;
  170. if ($set_case and $result =~ /^\w$/) {
  171. if ($set_case > 0) {
  172. $result = uc($result);
  173. } else {
  174. $result = lc($result);
  175. }
  176. }
  177. foreach my $accent_command (reverse(@$stack)) {
  178. $result = ascii_accent($result, $accent_command);
  179. }
  180. return $result;
  181. }
  182. # Same as ascii_accent, but with a converter as first argument to be consistent
  183. # with calling conventions of fallback accent formatting functions given
  184. # to convert_accents/encoded_accents
  185. sub ascii_accent_fallback($$$)
  186. {
  187. my $converter = shift;
  188. my $text = shift;
  189. my $command = shift;
  190. return ascii_accent($text, $command);
  191. }
  192. # format an accent command and nested accents within as Text.
  193. sub text_accents($;$$)
  194. {
  195. my $accent = shift;
  196. my $encoding = shift;
  197. my $set_case = shift;
  198. my ($contents, $stack)
  199. = Texinfo::Common::find_innermost_accent_contents($accent);
  200. my $options = {};
  201. $options->{'enabled_encoding'} = $encoding if (defined($encoding));
  202. $options->{'sc'} = $set_case if (defined($set_case));
  203. my $text = convert({'contents' => $contents}, $options);
  204. my $result = Texinfo::Convert::Unicode::encoded_accents(undef, $text,
  205. $stack, $encoding, \&ascii_accent_fallback, $set_case);
  206. if (defined($result)) {
  207. return $result;
  208. } else {
  209. return ascii_accents($text, $stack, $set_case);
  210. }
  211. }
  212. sub brace_no_arg_command($;$)
  213. {
  214. my $root = shift;
  215. my $options = shift;
  216. my $encoding;
  217. $encoding = $options->{'enabled_encoding'}
  218. if ($options and $options->{'enabled_encoding'});
  219. my $command = $root->{'cmdname'};
  220. $command = $root->{'extra'}->{'clickstyle'}
  221. if ($root->{'extra'}
  222. and defined($root->{'extra'}->{'clickstyle'})
  223. and defined($text_brace_no_arg_commands{$root->{'extra'}->{'clickstyle'}}));
  224. my $result = Texinfo::Convert::Unicode::unicode_for_brace_no_arg_command(
  225. $command, $encoding);
  226. if (!defined($result and $options and $options->{'converter'})) {
  227. my $tree = Texinfo::Common::translated_command_tree(
  228. $options->{'converter'}, $command);
  229. if ($tree) {
  230. $result = _convert($tree, $options);
  231. }
  232. }
  233. if (!defined($result)) {
  234. if ($options and $options->{'sort_string'}
  235. and $sort_brace_no_arg_commands{$command}) {
  236. $result = $sort_brace_no_arg_commands{$command};
  237. } else {
  238. $result = $text_brace_no_arg_commands{$command};
  239. }
  240. }
  241. if ($options and $Texinfo::Common::letter_no_arg_commands{$command}) {
  242. if ($options->{'sc'}) {
  243. $result = uc($result);
  244. } elsif ($options->{'lc'}) {
  245. $result = lc($result);
  246. }
  247. }
  248. return $result;
  249. }
  250. my %underline_symbol = (
  251. 0 => '*',
  252. 1 => '*',
  253. 2 => '=',
  254. 3 => '-',
  255. 4 => '.'
  256. );
  257. # Return the text of an underlined heading, possibly indented.
  258. sub heading($$$;$$)
  259. {
  260. my $current = shift;
  261. my $text = shift;
  262. my $converter = shift;
  263. my $numbered = shift;
  264. my $indent_length = shift;
  265. # REMARK to get the numberig right in case of an indented text, the
  266. # indentation should be given here. But this should never happen as
  267. # the only @-commands allowed in indented context are not number.
  268. $text = Texinfo::Common::numbered_heading($converter, $current, $text,
  269. $numbered);
  270. return '' if ($text !~ /\S/);
  271. my $result = $text ."\n";
  272. if (defined($indent_length)) {
  273. if ($indent_length < 0) {
  274. $indent_length = 0;
  275. }
  276. $result .= (' ' x $indent_length);
  277. } else {
  278. $indent_length = 0;
  279. }
  280. $result .=($underline_symbol{$current->{'level'}}
  281. x (Texinfo::Convert::Unicode::string_width($text) - $indent_length))."\n";
  282. return $result;
  283. }
  284. sub _code_options($)
  285. {
  286. my $options = shift;
  287. my $code_options;
  288. if (defined($options)) {
  289. $code_options = { %$options };
  290. } else {
  291. $code_options = {};
  292. }
  293. $code_options->{'code'} = 1;
  294. return $code_options;
  295. }
  296. sub convert($;$)
  297. {
  298. my $root = shift;
  299. # means it was called object oriented
  300. if (ref($root) ne 'HASH') {
  301. if (ref($root) eq 'ARRAY') {
  302. carp ("convert argument $root not blessed reference or HASH");
  303. return undef;
  304. }
  305. $root = shift;
  306. }
  307. my $options = shift;
  308. #print STDERR "CONVERT\n";
  309. return _convert($root, $options);
  310. }
  311. sub _convert($;$);
  312. sub _convert($;$)
  313. {
  314. my $root = shift;
  315. my $options = shift;
  316. return '' if (!($root->{'type'} and $root->{'type'} eq 'def_line')
  317. and (($root->{'type'} and $ignored_types{$root->{'type'}})
  318. or ($root->{'cmdname'}
  319. and ($ignored_brace_commands{$root->{'cmdname'}}
  320. or ($ignored_block_commands{$root->{'cmdname'}}
  321. and !(defined($options->{'expanded_formats_hash'})
  322. and $options->{'expanded_formats_hash'}->{$root->{'cmdname'}}))
  323. or ($Texinfo::Common::inline_commands{$root->{'cmdname'}}
  324. and $root->{'cmdname'} ne 'inlinefmtifelse'
  325. and (($Texinfo::Common::inline_format_commands{$root->{'cmdname'}}
  326. and (!$root->{'extra'}->{'format'}
  327. or !$options->{'expanded_formats_hash'}->{$root->{'extra'}->{'format'}}))
  328. or (!$Texinfo::Common::inline_format_commands{$root->{'cmdname'}}
  329. and !defined($root->{'extra'}->{'expand_index'}))))
  330. # here ignore most of the misc commands
  331. or ($root->{'args'} and $root->{'args'}->[0]
  332. and $root->{'args'}->[0]->{'type'}
  333. and ($root->{'args'}->[0]->{'type'} eq 'misc_line_arg'
  334. or $root->{'args'}->[0]->{'type'} eq 'misc_arg')
  335. and !$formatting_misc_commands{$root->{'cmdname'}})))));
  336. my $result = '';
  337. if (defined($root->{'text'})) {
  338. if ($root->{'type'} and $root->{'type'} eq 'untranslated'
  339. and $options and $options->{'converter'}) {
  340. my $save_lang = $options->{'converter'}->get_conf('documentlanguage');
  341. $options->{'converter'}->{'documentlanguage'}
  342. = $root->{'extra'}->{'documentlanguage'};
  343. my $tree = Texinfo::Report::gdt($options->{'converter'},
  344. $root->{'text'});
  345. $result = _convert($tree, $options);
  346. $options->{'converter'}->{'documentlanguage'} = $save_lang;
  347. } else {
  348. $result = $root->{'text'};
  349. if ((! defined($root->{'type'})
  350. or $root->{'type'} ne 'raw')
  351. and !$options->{'raw'}) {
  352. if ($options->{'sc'}) {
  353. $result = uc($result);
  354. }
  355. if (!$options->{'code'}) {
  356. $result =~ s/``/"/g;
  357. $result =~ s/\'\'/"/g;
  358. $result =~ s/---/\x{1F}/g;
  359. $result =~ s/--/-/g;
  360. $result =~ s/\x{1F}/--/g;
  361. }
  362. }
  363. }
  364. }
  365. if ($root->{'cmdname'}) {
  366. my $command = $root->{'cmdname'};
  367. if (defined($no_brace_commands{$root->{'cmdname'}})) {
  368. return $no_brace_commands{$root->{'cmdname'}};
  369. } elsif ($root->{'cmdname'} eq 'today') {
  370. if ($options->{'sort_string'}
  371. and $sort_brace_no_arg_commands{$root->{'cmdname'}}) {
  372. return $sort_brace_no_arg_commands{$root->{'cmdname'}};
  373. } elsif ($options->{'converter'}) {
  374. return _convert(Texinfo::Common::expand_today($options->{'converter'}),
  375. $options);
  376. } elsif ($options->{'TEST'}) {
  377. return 'a sunny day';
  378. } else {
  379. my($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst)
  380. = localtime(time);
  381. $year += ($year < 70) ? 2000 : 1900;
  382. return "$Texinfo::Common::MONTH_NAMES[$mon] $mday, $year";
  383. }
  384. } elsif (defined($text_brace_no_arg_commands{$root->{'cmdname'}})) {
  385. return brace_no_arg_command($root, $options);
  386. # commands with braces
  387. } elsif ($accent_commands{$root->{'cmdname'}}) {
  388. my $result = text_accents ($root, $options->{'enabled_encoding'},
  389. $options->{'sc'});
  390. return $result;
  391. } elsif ($root->{'cmdname'} eq 'image') {
  392. return _convert($root->{'args'}->[0], _code_options($options));
  393. } elsif ($root->{'cmdname'} eq 'email') {
  394. my $mail = _convert($root->{'args'}->[0], _code_options($options));
  395. my $text;
  396. $text = _convert($root->{'args'}->[1], $options)
  397. if (defined($root->{'args'}->[1]));
  398. return $text if (defined($text) and ($text ne ''));
  399. return $mail;
  400. } elsif ($root->{'cmdname'} eq 'uref' or $root->{'cmdname'} eq 'url') {
  401. my $replacement;
  402. $replacement = _convert($root->{'args'}->[2], $options)
  403. if (defined($root->{'args'}->[2]));
  404. return $replacement if (defined($replacement) and $replacement ne '');
  405. my $text;
  406. $text = _convert($root->{'args'}->[1], $options)
  407. if (defined($root->{'args'}->[1]));
  408. my $url = _convert($root->{'args'}->[0], _code_options($options));
  409. if (defined($text) and $text ne '') {
  410. return "$url ($text)";
  411. } else {
  412. return $url;
  413. }
  414. } elsif ($Texinfo::Common::explained_commands{$root->{'cmdname'}}
  415. and $root->{'args'} and $root->{'args'}->[1]) {
  416. my $explanation = _convert($root->{'args'}->[1], $options);
  417. if ($explanation ne '') {
  418. return _convert($root->{'args'}->[0], $options) ." ($explanation)";
  419. } else {
  420. return _convert($root->{'args'}->[0], $options);
  421. }
  422. } elsif ($Texinfo::Common::inline_commands{$root->{'cmdname'}}) {
  423. $options->{'raw'} = 1 if ($root->{'cmdname'} eq 'inlineraw');
  424. my $arg_index = 1;
  425. if ($root->{'cmdname'} eq 'inlinefmtifelse'
  426. and (!$root->{'extra'}->{'format'}
  427. or !$options->{'expanded_formats_hash'}->{$root->{'extra'}->{'format'}})) {
  428. $arg_index = 2;
  429. }
  430. if (scalar(@{$root->{'args'}}) > $arg_index) {
  431. return _convert($root->{'args'}->[$arg_index], $options);
  432. } else {
  433. return '';
  434. }
  435. } elsif ($root->{'args'} and $root->{'args'}->[0]
  436. and (($root->{'args'}->[0]->{'type'}
  437. and $root->{'args'}->[0]->{'type'} eq 'brace_command_arg')
  438. or $root->{'cmdname'} eq 'math')) {
  439. my $result;
  440. if ($root->{'cmdname'} eq 'sc') {
  441. $options = {%$options, 'sc' => 1};
  442. } elsif ($Texinfo::Common::code_style_commands{$root->{'cmdname'}}
  443. or $root->{'cmdname'} eq 'math') {
  444. $options = _code_options($options);
  445. }
  446. $result = _convert($root->{'args'}->[0], $options);
  447. return $result;
  448. # block commands
  449. } elsif ($root->{'cmdname'} eq 'quotation'
  450. or $root->{'cmdname'} eq 'smallquotation'
  451. or $root->{'cmdname'} eq 'float') {
  452. if ($root->{'args'}) {
  453. foreach my $arg (@{$root->{'args'}}) {
  454. my $converted_arg = _convert($arg, $options);
  455. if ($converted_arg =~ /\S/) {
  456. $result .= $converted_arg.", ";
  457. }
  458. }
  459. $result =~ s/, $//;
  460. chomp ($result);
  461. $result .= "\n" if ($result =~ /\S/);
  462. }
  463. } elsif ($options->{'expanded_formats_hash'}->{$root->{'cmdname'}}) {
  464. $options->{'raw'} = 1;
  465. } elsif ($formatting_misc_commands{$root->{'cmdname'}} and $root->{'args'}) {
  466. if ($root->{'cmdname'} eq 'sp') {
  467. if ($root->{'extra'} and $root->{'extra'}->{'misc_args'}
  468. and $root->{'extra'}->{'misc_args'}->[0]) {
  469. # this useless copy avoids perl changing the type to integer!
  470. my $sp_nr = $root->{'extra'}->{'misc_args'}->[0];
  471. $result = "\n" x $sp_nr;
  472. }
  473. } elsif ($root->{'cmdname'} eq 'verbatiminclude') {
  474. my $verbatim_include_verbatim
  475. = Texinfo::Common::expand_verbatiminclude($options->{'converter'},
  476. $root);
  477. if (defined($verbatim_include_verbatim)) {
  478. $result .= _convert($verbatim_include_verbatim, $options);
  479. }
  480. } elsif ($root->{'cmdname'} ne 'node') {
  481. $result = _convert($root->{'args'}->[0], $options);
  482. if ($Texinfo::Common::sectioning_commands{$root->{'cmdname'}}) {
  483. $result = heading($root, $result, $options->{'converter'},
  484. $options->{'NUMBER_SECTIONS'});
  485. } else {
  486. # we always want an end of line even if is was eaten by a command
  487. chomp($result);
  488. $result .= "\n";
  489. }
  490. }
  491. } elsif ($root->{'cmdname'} eq 'item'
  492. and $root->{'parent'}->{'cmdname'}
  493. and $root->{'parent'}->{'cmdname'} eq 'enumerate') {
  494. $result .= Texinfo::Common::enumerate_item_representation(
  495. $root->{'parent'}->{'extra'}->{'enumerate_specification'},
  496. $root->{'extra'}->{'item_number'}) . '. ';
  497. }
  498. }
  499. if ($root->{'type'} and $root->{'type'} eq 'def_line') {
  500. #print STDERR "$root->{'extra'}->{'def_command'}\n";
  501. if ($root->{'extra'} and $root->{'extra'}->{'def_args'}
  502. and @{$root->{'extra'}->{'def_args'}}) {
  503. my $parsed_definition_category
  504. = Texinfo::Common::definition_category ($options->{'converter'}, $root);
  505. my @contents = ($parsed_definition_category, {'text' => ': '});
  506. if ($root->{'extra'}->{'def_parsed_hash'}->{'type'}) {
  507. push @contents, ($root->{'extra'}->{'def_parsed_hash'}->{'type'},
  508. {'text' => ' '});
  509. }
  510. push @contents, $root->{'extra'}->{'def_parsed_hash'}->{'name'};
  511. my $arguments = Texinfo::Common::definition_arguments_content($root);
  512. if ($arguments) {
  513. push @contents, {'text' => ' '};
  514. push @contents, @$arguments;
  515. }
  516. push @contents, {'text' => "\n"};
  517. $result = _convert({'contents' => \@contents}, _code_options($options));
  518. }
  519. #$result = convert($root->{'args'}->[0], $options) if ($root->{'args'});
  520. } elsif ($root->{'type'} and $root->{'type'} eq 'menu_entry') {
  521. foreach my $arg (@{$root->{'args'}}) {
  522. if ($arg->{'type'} eq 'menu_entry_node') {
  523. $result .= _convert($arg, _code_options($options));
  524. } else {
  525. $result .= _convert($arg, $options);
  526. }
  527. }
  528. if (!$root->{'parent'}->{'type'}
  529. or ($root->{'parent'}->{'type'} ne 'preformatted'
  530. and $root->{'parent'}->{'type'} ne 'rawpreformatted')) {
  531. chomp($result);
  532. $result .= "\n";
  533. }
  534. }
  535. if ($root->{'contents'}) {
  536. if ($root->{'cmdname'}
  537. and $Texinfo::Common::preformatted_code_commands{$root->{'cmdname'}}) {
  538. $options = _code_options($options);
  539. }
  540. if (ref($root->{'contents'}) ne 'ARRAY') {
  541. cluck "contents not an array($root->{'contents'}).";
  542. }
  543. foreach my $content (@{$root->{'contents'}}) {
  544. $result .= _convert($content, $options);
  545. }
  546. }
  547. $result = '{'.$result.'}'
  548. if ($root->{'type'} and $root->{'type'} eq 'bracketed'
  549. and (!$root->{'parent'}->{'type'} or
  550. ($root->{'parent'}->{'type'} ne 'block_line_arg'
  551. and $root->{'parent'}->{'type'} ne 'misc_line_arg')));
  552. #print STDERR " RR ($root) -> $result\n";
  553. return $result;
  554. }
  555. # Implement the converters API, but as simply as possible
  556. # initialization
  557. sub converter($)
  558. {
  559. my $class = shift;
  560. my $conf;
  561. my $converter = {};
  562. if (ref($class) eq 'HASH') {
  563. $conf = $class;
  564. bless $converter;
  565. } elsif (defined($class)) {
  566. bless $converter, $class;
  567. $conf = shift;
  568. } else {
  569. bless $converter;
  570. $conf = shift;
  571. }
  572. if ($conf) {
  573. %{$converter} = %{$conf};
  574. }
  575. my $expanded_formats = $converter->{'expanded_formats'};;
  576. if ($converter->{'parser'}) {
  577. $converter->{'info'} = $converter->{'parser'}->global_informations();
  578. $converter->{'extra'} = $converter->{'parser'}->global_commands_information();
  579. foreach my $global_command ('documentencoding') {
  580. if (defined($converter->{'extra'}->{$global_command})) {
  581. my $root = $converter->{'extra'}->{$global_command}->[0];
  582. if ($global_command eq 'documentencoding'
  583. and defined($root->{'extra'})
  584. and defined($root->{'extra'}->{'input_perl_encoding'})) {
  585. $converter->{'OUTPUT_ENCODING_NAME'}
  586. = $root->{'extra'}->{'input_encoding_name'};
  587. $converter->{'OUTPUT_PERL_ENCODING'}
  588. = $root->{'extra'}->{'input_perl_encoding'};
  589. }
  590. }
  591. }
  592. if (!$expanded_formats and $converter->{'parser'}->{'expanded_formats'}) {
  593. $expanded_formats = $converter->{'parser'}->{'expanded_formats'};
  594. }
  595. }
  596. if ($expanded_formats) {
  597. foreach my $expanded_format(@$expanded_formats) {
  598. $converter->{'expanded_formats_hash'}->{$expanded_format} = 1;
  599. }
  600. }
  601. bless $converter;
  602. return $converter;
  603. }
  604. sub convert_tree($$)
  605. {
  606. my $self = shift;
  607. my $root = shift;
  608. return _convert($root);
  609. }
  610. # determine outfile and output to that file
  611. my $STDIN_DOCU_NAME = 'stdin';
  612. sub output($$)
  613. {
  614. my $self = shift;
  615. my $tree = shift;
  616. #print STDERR "OUTPUT\n";
  617. my $input_basename;
  618. if (defined($self->{'info'}->{'input_file_name'})) {
  619. my ($directories, $suffix);
  620. ($input_basename, $directories, $suffix)
  621. = fileparse($self->{'info'}->{'input_file_name'});
  622. } else {
  623. # This could happen if called on a piece of texinfo
  624. $input_basename = '';
  625. }
  626. $self->{'input_basename'} = $input_basename;
  627. $input_basename = $STDIN_DOCU_NAME if ($input_basename eq '-');
  628. $input_basename =~ s/\.te?x(i|info)?$//;
  629. my $setfilename;
  630. $setfilename = $self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'}
  631. if ($self->{'extra'} and $self->{'extra'}->{'setfilename'}
  632. and $self->{'extra'}->{'setfilename'}->{'extra'}
  633. and defined($self->{'extra'}->{'setfilename'}->{'extra'}->{'text_arg'}));
  634. my $outfile;
  635. if (!defined($self->{'OUTFILE'})) {
  636. if (defined($setfilename)) {
  637. $outfile = $setfilename;
  638. $outfile =~ s/\.[^\.]*$//;
  639. } elsif ($input_basename ne '') {
  640. $outfile = $input_basename;
  641. }
  642. if (defined($outfile)) {
  643. $outfile .= '.txt';
  644. }
  645. } else {
  646. $outfile = $self->{'OUTFILE'};
  647. }
  648. my $fh;
  649. if (defined($outfile)) {
  650. $fh = $self->Texinfo::Common::open_out($outfile);
  651. return undef if (!$fh);
  652. }
  653. my %options = $self->Texinfo::Common::_convert_text_options();
  654. my $result = _convert($tree, \%options);
  655. if ($fh) {
  656. print $fh $result;
  657. return undef if (!close($fh));
  658. $result = '';
  659. }
  660. return $result;
  661. }
  662. sub get_conf($$)
  663. {
  664. my $self = shift;
  665. my $key = shift;
  666. return $self->{$key};
  667. }
  668. sub errors()
  669. {
  670. return undef;
  671. }
  672. sub converter_unclosed_files()
  673. {
  674. return undef;
  675. }
  676. sub converter_opened_files()
  677. {
  678. return ();
  679. }
  680. sub converter_defaults()
  681. {
  682. return ();
  683. }
  684. 1;
  685. __END__
  686. =head1 NAME
  687. Texinfo::Convert::Text - Convert Texinfo tree to simple text
  688. =head1 SYNOPSIS
  689. use Texinfo::Convert::Text qw(convert ascii_accent text_accents);
  690. my $result = convert($tree);
  691. my $result_encoded = convert($tree,
  692. {'enabled_encoding' => 'utf-8'});
  693. my $result_converter = convert($tree,
  694. {'converter' => $converter});
  695. my $result_accent_text = ascii_accent('e', $accent_command);
  696. my $accents_text = text_accents($accents, 'utf-8');
  697. =head1 DESCRIPTION
  698. Texinfo::Convert::Text is a simple backend that converts a Texinfo tree
  699. to simple text. It is used for some command argument expansion in
  700. C<Texinfo::Parser>, for instance the file names, or encoding names.
  701. The converter is very simple, and, in the default case, cannot handle
  702. output strings translation or error handling.
  703. =head1 METHODS
  704. =over
  705. =item $result = convert($tree, $options)
  706. Convert a Texinfo tree to simple text. I<$options> is a hash reference of
  707. options. The converter is very simple, and has no internal state besides
  708. the options. It cannot handle as is output strings translation or error
  709. storing.
  710. If the I<converter> option is set, some additional features may be available
  711. for the conversion of some @-commands, like output strings translation or
  712. error reporting.
  713. The following options may be set:
  714. =over
  715. =item enabled_encoding
  716. If set, the value is considered to be the encoding name texinfo accented
  717. letters should be converted to. This option corresponds to the
  718. C<--enable-encoding> option, or the C<ENABLE_ENCODING> customization
  719. variable.
  720. =item sc
  721. If set, the text is upper-cased.
  722. =item code
  723. If set the text is in code style. (mostly --, ---, '' and `` are kept as
  724. is).
  725. =item NUMBER_SECTIONS
  726. If set, sections are numbered when output.
  727. =item sort_string
  728. A somehow internal option to convert to text more suitable for alphabetical
  729. sorting rather than presentation.
  730. =item converter
  731. If this converter object is passed to the function, some features of this
  732. object may be used during conversion. Mostly error reporting and strings
  733. translation, as the converter object is also supposed to be a
  734. L<Texinfo::Report> objet. See also L<Texinfo::Convert::Converter>.
  735. =item expanded_formats_hash
  736. A reference on a hash. The keys should be format names (like C<html>,
  737. C<tex>), and if thecorresponding value is set, the format is expanded.
  738. =back
  739. =item $result_accent_text = ascii_accent($text, $accent_command)
  740. I<$text> is the text appearing within an accent command. I<$accent_command>
  741. should be a Texinfo tree element corresponding to an accent command taking
  742. an argument. The function returns a transliteration of the accented
  743. character.
  744. =item $result_accent_text = ascii_accent_fallback($converter, $text, $accent_command)
  745. Same as C<ascii_accent> but with an additional first argument
  746. converter, which is in ignored, but needed if this function is to
  747. be in argument of functions that need a fallback for accents
  748. conversion.
  749. =item $accents_text = text_accents($accents, $encoding, $set_case)
  750. I<$accents> is an accent command that may contain other nested accent
  751. commands. The function will format the whole stack of nested accent
  752. commands and the innermost text. If I<$encoding> is set, the formatted
  753. text is converted to this encoding as much as possible instead of being
  754. converted as simple ascii. If I<$set_case> is positive, the result
  755. is meant to be upper-cased, if it is negative, the result is to be
  756. lower-cased.
  757. =back
  758. =head1 AUTHOR
  759. Patrice Dumas, E<lt>pertusus@free.frE<gt>
  760. =head1 COPYRIGHT AND LICENSE
  761. Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
  762. This library is free software; you can redistribute it and/or modify
  763. it under the terms of the GNU General Public License as published by
  764. the Free Software Foundation; either version 3 of the License,
  765. or (at your option) any later version.
  766. =cut