internal.h 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. /*
  2. * internal.h - internal header stuff for the charset library.
  3. */
  4. #ifndef charset_internal_h
  5. #define charset_internal_h
  6. /* This invariably comes in handy */
  7. #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
  8. /* This is an invalid Unicode value used to indicate an error. */
  9. #define ERROR 0xFFFFL /* Unicode value representing error */
  10. typedef struct charset_spec charset_spec;
  11. typedef struct sbcs_data sbcs_data;
  12. struct charset_spec {
  13. int charset; /* numeric identifier */
  14. /*
  15. * A function to read the character set and output Unicode
  16. * characters. The `emit' function expects to get Unicode chars
  17. * passed to it; it should be sent ERROR for any encoding error
  18. * on the input.
  19. */
  20. void (*read)(charset_spec const *charset, long int input_chr,
  21. charset_state *state,
  22. void (*emit)(void *ctx, long int output), void *emitctx);
  23. /*
  24. * A function to read Unicode characters and output in this
  25. * character set. The `emit' function expects to get byte
  26. * values passed to it; it should be sent ERROR for any
  27. * non-representable characters on the input.
  28. */
  29. void (*write)(charset_spec const *charset, long int input_chr,
  30. charset_state *state,
  31. void (*emit)(void *ctx, long int output), void *emitctx);
  32. void const *data;
  33. };
  34. /*
  35. * This is the format of `data' used by the SBCS read and write
  36. * functions; so it's the format used in all SBCS definitions.
  37. */
  38. struct sbcs_data {
  39. /*
  40. * This is a simple mapping table converting each SBCS position
  41. * to a Unicode code point. Some positions may contain ERROR,
  42. * indicating that that byte value is not defined in the SBCS
  43. * in question and its occurrence in input is an error.
  44. */
  45. unsigned long sbcs2ucs[256];
  46. /*
  47. * This lookup table is used to convert Unicode back to the
  48. * SBCS. It consists of the valid byte values in the SBCS,
  49. * sorted in order of their Unicode translation. So given a
  50. * Unicode value U, you can do a binary search on this table
  51. * using the above table as a lookup: when testing the Xth
  52. * position in this table, you branch according to whether
  53. * sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal
  54. * to U.
  55. *
  56. * Note that since there may be fewer than 256 valid byte
  57. * values in a particular SBCS, we must supply the length of
  58. * this table as well as the contents.
  59. */
  60. unsigned char ucs2sbcs[256];
  61. int nvalid;
  62. };
  63. /*
  64. * Prototypes for internal library functions.
  65. */
  66. charset_spec const *charset_find_spec(int charset);
  67. void read_sbcs(charset_spec const *charset, long int input_chr,
  68. charset_state *state,
  69. void (*emit)(void *ctx, long int output), void *emitctx);
  70. void write_sbcs(charset_spec const *charset, long int input_chr,
  71. charset_state *state,
  72. void (*emit)(void *ctx, long int output), void *emitctx);
  73. /*
  74. * Placate compiler warning about unused parameters, of which we
  75. * expect to have some in this library.
  76. */
  77. #define UNUSEDARG(x) ( (x) = (x) )
  78. #endif /* charset_internal_h */