mthl
/
texinfo


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
							/* Copyright 2010, 2011, 2012, 2013, 2014, 2015
   Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>. */

#define _GNU_SOURCE

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <iconv.h>
#include <errno.h>
#include <sys/stat.h>

#include "tree_types.h"
#include "input.h"
#include "text.h"
#include "api.h"

enum input_type { IN_file, IN_text };

enum character_encoding {
    ce_latin1,
    ce_latin2,
    ce_utf8,
    ce_shiftjis
};

typedef struct {
    enum input_type type;

    FILE *file;
    char *input_encoding;
    LINE_NR line_nr;

    char *text;  /* Input text to be parsed as Texinfo. */
    char *ptext; /* How far we are through 'text'.  Used to split 'text'
                    into lines. */
} INPUT;

static INPUT *input_stack = 0;
int input_number = 0;
int input_space = 0;

/* Current filename and line number.  Used for reporting. */
LINE_NR line_nr;

// 1961
/* Collect text from the input sources until a newline is found.  This is used 
   instead of next_text when we need to be sure we get an entire line of 
   Texinfo input (for example as a line argument to a command), which might not 
   be the case if the input is the result of a macro expansion.

   Return value should not be freed by caller, and becomes invalid after
   a subsequent call. */
char *
new_line (void)
{
  static TEXT t;
  char *new = 0;

  t.end = 0;

  while (1)
    {
      new = next_text ();
      if (!new)
        break;
      text_append (&t, new);
      free (new);

      if (t.text[t.end - 1] == '\n')
        break;
    }

  if (t.end > 0)
    return t.text;
  else
    return 0;
}


/* TODO: integrate with gnulib */
#define ICONV_CONST

static iconv_t iconv_from_latin1 = (iconv_t) 0;
static iconv_t iconv_from_latin2;
static iconv_t iconv_from_shiftjis;

/* Run iconv using text buffer as output buffer. */
size_t
text_buffer_iconv (TEXT *buf, iconv_t iconv_state,
                   ICONV_CONST char **inbuf, size_t *inbytesleft)
{
  size_t out_bytes_left;
  char *outptr;
  size_t iconv_ret;

  outptr = buf->text + buf->end;
  if (buf->end == buf->space - 1)
    {
      errno = E2BIG;
      return (size_t) -1;
    }
  out_bytes_left = buf->space - buf->end - 1;
  iconv_ret = iconv (iconv_state, inbuf, inbytesleft,
                     &outptr, &out_bytes_left);

  buf->end = outptr - buf->text;

  return iconv_ret;
}


/* Return conversion of S according to ENC.  This function frees S. */
static char *
convert_to_utf8 (char *s, char *input_encoding)
{
  iconv_t our_iconv;
  static TEXT t;
  char *inptr; size_t bytes_left;
  size_t iconv_ret;
  enum character_encoding enc;

  /* Convert from @documentencoding to UTF-8.
       It might be possible not to convert to UTF-8 and use an 8-bit encoding
     throughout, but then we'd have to not set the UTF-8 flag on the Perl 
     strings in api.c.  If multiple character encodings were used in a single 
     file, then we'd have to keep track of which strings needed the UTF-8 flag
     and which didn't. */

  /* Could and check for malformed input: see
     <http://savannah.gnu.org/bugs/?42896>. */

  if (iconv_from_latin1 == (iconv_t) 0)
    {
      /* Initialize the conversion for the first time. */
      iconv_from_latin1 = iconv_open ("UTF-8", "ISO-8859-1");
      if (iconv_from_latin1 == (iconv_t) -1)
        {
          abort ();

          /* big trouble.  if we do return it unconverted, we will have to
             remember not to set the UTF-8 flags on the Perl strings, otherwise
             Perl will choke. */
          return s;
        }
    }
  if (iconv_from_latin2 == (iconv_t) 0)
    {
      /* Initialize the conversion for the first time. */
      iconv_from_latin2 = iconv_open ("UTF-8", "ISO-8859-2");
      if (iconv_from_latin2 == (iconv_t) -1)
        iconv_from_latin2 = iconv_from_latin1;
    }
  if (iconv_from_shiftjis == (iconv_t) 0)
    {
      /* Initialize the conversion for the first time. */
      iconv_from_shiftjis = iconv_open ("UTF-8", "SHIFT-JIS");
      if (iconv_from_shiftjis == (iconv_t) -1)
        iconv_from_shiftjis = iconv_from_latin1;
    }

  enc = ce_latin1;
  if (!input_encoding)
    ;
  else if (!strcmp (input_encoding, "utf-8"))
    enc = ce_utf8;
  else if (!strcmp (input_encoding, "iso-8859-2"))
    enc = ce_latin2;
  else if (!strcmp (input_encoding, "shift_jis"))
    enc = ce_shiftjis;

  switch (enc)
    {
    case ce_utf8:
      return s; /* no conversion required. */
      break;
    case ce_latin1:
      our_iconv = iconv_from_latin1;
      break;
    case ce_latin2:
      our_iconv = iconv_from_latin2;
      break;
    case ce_shiftjis:
      our_iconv = iconv_from_shiftjis;
      break;
    }

  t.end = 0;
  inptr = s;
  bytes_left = strlen (s);
  text_alloc (&t, 10);

  while (1)
    {
      iconv_ret = text_buffer_iconv (&t, our_iconv,
                                     &inptr, &bytes_left);

      /* Make sure libiconv flushes out the last converted character.
         This is required when the conversion is stateful, in which
         case libiconv might not output the last character, waiting to
         see whether it should be combined with the next one.  */
      if (iconv_ret != (size_t) -1
          && text_buffer_iconv (&t, our_iconv, 0, 0) != (size_t) -1)
        /* Success: all of input converted. */
        break;

      switch (errno)
        {
        case E2BIG:
          text_alloc (&t, t.space + 20);
          break;
        default:
          abort ();
          break;
        }
    }

  free (s);
  t.text[t.end] = '\0';
  //fprintf (stderr, "CONVERTED STRING IS <<%s>>", t.text);
  return strdup (t.text);
}

int
expanding_macro (char *macro)
{
  int i;
  for (i = 0; i < input_number; i++)
    {
      if (input_stack[i].line_nr.macro
          && !strcmp (input_stack[i].line_nr.macro, macro))
        {
          return 1;
        }
    }
  return 0;
}

/* Return value to be freed by caller.  Return null if we are out of input. */
char *
next_text (void)
{
  ssize_t status;
  char *line = 0;
  size_t n;
  FILE *input_file;

  while (input_number > 0)
    {
      /* Check for pending input. */
      INPUT *i = &input_stack[input_number - 1];

      switch (i->type)
        {
          char *p, *new;
        case IN_text:
          if (!*i->ptext)
            {
              /* End of text reached. */
              free (i->text);
              break;
            }
          /* Split off a line of input. */
          p = strchrnul (i->ptext, '\n');
          new = strndup (i->ptext, p - i->ptext + 1);
          if (*p)
            i->ptext = p + 1;
          else
            i->ptext = p; /* The next time, we will pop the input source. */

          if (!i->line_nr.macro)
            i->line_nr.line_nr++;

          line_nr = i->line_nr;

          return convert_to_utf8 (new, 0); // i->input_encoding);

          break;
        case IN_file: // 1911
          input_file = input_stack[input_number - 1].file;
          status = getline (&line, &n, input_file);
          while (status != -1)
            {
              char *comment;
              if (feof (input_file))
                {
                  /* Add a newline at the end of the file if one is missing. */
                  char *line2;
                  asprintf (&line2, "%s\n", line);
                  free (line);
                  line = line2;
                }

              /* Strip off a comment. */
              comment = strchr (line, '\x7F');
              if (comment)
                *comment = '\0';

              // 1920 CPP_LINE_DIRECTIVES

              i->line_nr.line_nr++;
              line_nr = i->line_nr;

              return convert_to_utf8 (line, i->input_encoding);
            }
          free (line); line = 0;
          break;
        default:
          abort ();
        }

      /* Top input source failed.  Pop it and try the next one. */
      
      if (input_stack[input_number - 1].type == IN_file)
        {
          FILE *file = input_stack[input_number - 1].file;

          if (file != stdin)
            {
              if (fclose (input_stack[input_number - 1].file) == EOF)
                abort (); // error
            }
        }
      input_number--;
    }
  return 0;
}

void
input_push (char *text, char *macro, char *filename, int line_number)
{
  if (input_number == input_space)
    {
      input_space++; input_space *= 1.5;
      input_stack = realloc (input_stack, input_space * sizeof (INPUT));
      if (!input_stack)
        abort ();
    }

  input_stack[input_number].type = IN_text;
  input_stack[input_number].file = 0;
  input_stack[input_number].text = text;
  input_stack[input_number].ptext = text;
  input_stack[input_number].input_encoding = 0;

  if (!macro)
    line_number--;
  input_stack[input_number].line_nr.line_nr = line_number;
  input_stack[input_number].line_nr.file_name = filename;
  input_stack[input_number].line_nr.macro = macro;
  input_number++;
}

/* Store TEXT as a source for Texinfo content.  TEXT will be later free'd
   and must be allocated on the heap.  MACRO is the name of a macro that
   the text came from. */
void
input_push_text (char *text, char *macro)
{
  if (text)
    input_push (text, macro, 0, line_nr.line_nr);
}

/* Used in tests - like input_push_text, but the lines from the text have
   line numbers. */
void
input_push_text_with_line_nos (char *text, int starting)
{
  input_push (text, 0, 0, starting);
  input_stack[input_number - 1].type = IN_text;
}

void
input_reset_input_stack (void)
{
  input_number = 0;
  /* TODO: free the memory */
}

int
top_file_index (void)
{
  int i = input_number - 1;
  while (i >= 0 && input_stack[i].type != IN_file)
    i--;
  return i;
}

void
set_input_encoding (char *encoding)
{
  int i;

  /* Set encoding of top file in stack. */
  i = top_file_index ();
  if (i >= 0)
    input_stack[i].input_encoding = encoding;
}


static char **include_dirs;
static size_t include_dirs_number;
static size_t include_dirs_space;

void
add_include_directory (char *filename)
{
  int len;
  if (include_dirs_number == include_dirs_space)
    {
      include_dirs = realloc (include_dirs,
                              sizeof (char *) * (include_dirs_space += 5));
    }
  filename = strdup (filename);
  include_dirs[include_dirs_number++] = filename;
  len = strlen (filename);
  if (len > 0 && filename[len - 1] == '/')
    filename[len - 1] = '\0';
}

char *
locate_include_file (char *filename)
{
  char *fullpath;
  struct stat dummy;
  int i, status;

  /* Checks if filename is absolute or relative to current directory.
     TODO: Could use macros in top-level config.h for this. */
  /* TODO: The Perl code (in Common.pm, 'locate_include_file') handles 
     a volume in a path (like "A:"), possibly more general treatment 
     with File::Spec module. */
  if (!memcmp (filename, "/", 1)
      || !memcmp (filename, "../", 3)
      || !memcmp (filename, "./", 2))
    {
      status = stat (filename, &dummy);
      if (status == 0)
        return filename;
    }
  else
    {
      for (i = 0; i < include_dirs_number; i++)
        {
          asprintf (&fullpath, "%s/%s", include_dirs[i], filename);
          status = stat (fullpath, &dummy);
          if (status == 0)
            return fullpath;
          free (fullpath);
        }
    }
  return 0;
}

/* Try to open a file called FILENAME, looking for it in the list of include
   directories. */
int
input_push_file (char *filename)
{
  FILE *stream;

  stream = fopen (filename, "r");
  if (!stream)
    return errno;

  if (input_number == input_space)
    {
      input_stack = realloc (input_stack, (input_space += 5) * sizeof (INPUT));
      if (!input_stack)
        abort ();
    }

  /* Strip off a leading directory path. */
  char *p, *q;
  p = 0;
  q = strchr (filename, '/');
  while (q)
    {
      p = q;
      q = strchr (q + 1, '/');
    }
  if (p)
    filename = strdup (p+1);

  input_stack[input_number].type = IN_file;
  input_stack[input_number].file = stream;
  input_stack[input_number].line_nr.file_name = filename;
  input_stack[input_number].line_nr.line_nr = 0;
  input_stack[input_number].line_nr.macro = 0;
  input_stack[input_number].text = 0;
  input_stack[input_number].ptext = 0;
  input_stack[input_number].input_encoding = 0;
  input_number++;

  return 0;
}