localcharset.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. /* Determine a canonical name for the current locale's character encoding.
  2. Copyright (C) 2000-2001 Free Software Foundation, Inc.
  3. This program is free software; you can redistribute it and/or modify it
  4. under the terms of the GNU Library General Public License as published
  5. by the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Library General Public License for more details.
  11. You should have received a copy of the GNU Library General Public
  12. License along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
  14. USA. */
  15. /* Written by Bruno Haible <haible@clisp.cons.org>. */
  16. #ifdef HAVE_CONFIG_H
  17. # include <config.h>
  18. #endif
  19. #if HAVE_STDDEF_H
  20. # include <stddef.h>
  21. #endif
  22. #include <stdio.h>
  23. #if HAVE_STRING_H
  24. # include <string.h>
  25. #else
  26. # include <strings.h>
  27. #endif
  28. #if HAVE_STDLIB_H
  29. # include <stdlib.h>
  30. #endif
  31. #if defined _WIN32 || defined __WIN32__
  32. # undef WIN32 /* avoid warning on mingw32 */
  33. # define WIN32
  34. #endif
  35. #ifndef WIN32
  36. # if HAVE_LANGINFO_CODESET
  37. # include <langinfo.h>
  38. # else
  39. # if HAVE_SETLOCALE
  40. # include <locale.h>
  41. # endif
  42. # endif
  43. #else /* WIN32 */
  44. # define WIN32_LEAN_AND_MEAN
  45. # include <windows.h>
  46. #endif
  47. #ifndef DIRECTORY_SEPARATOR
  48. # define DIRECTORY_SEPARATOR '/'
  49. #endif
  50. #ifndef ISSLASH
  51. # define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
  52. #endif
  53. /* The following static variable is declared 'volatile' to avoid a
  54. possible multithread problem in the function get_charset_aliases. If we
  55. are running in a threaded environment, and if two threads initialize
  56. 'charset_aliases' simultaneously, both will produce the same value,
  57. and everything will be ok if the two assignments to 'charset_aliases'
  58. are atomic. But I don't know what will happen if the two assignments mix. */
  59. #if __STDC__ != 1
  60. # define volatile /* empty */
  61. #endif
  62. /* Pointer to the contents of the charset.alias file, if it has already been
  63. read, else NULL. Its format is:
  64. ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */
  65. static const char * volatile charset_aliases;
  66. /* Return a pointer to the contents of the charset.alias file. */
  67. static const char *
  68. get_charset_aliases ()
  69. {
  70. const char *cp;
  71. cp = charset_aliases;
  72. if (cp == NULL)
  73. {
  74. #ifndef WIN32
  75. FILE *fp;
  76. const char *dir = LIBDIR;
  77. const char *base = "charset.alias";
  78. char *file_name;
  79. /* Concatenate dir and base into freshly allocated file_name. */
  80. {
  81. size_t dir_len = strlen (dir);
  82. size_t base_len = strlen (base);
  83. int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
  84. file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
  85. if (file_name != NULL)
  86. {
  87. memcpy (file_name, dir, dir_len);
  88. if (add_slash)
  89. file_name[dir_len] = DIRECTORY_SEPARATOR;
  90. memcpy (file_name + dir_len + add_slash, base, base_len + 1);
  91. }
  92. }
  93. if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
  94. /* Out of memory or file not found, treat it as empty. */
  95. cp = "";
  96. else
  97. {
  98. /* Parse the file's contents. */
  99. int c;
  100. char buf1[50+1];
  101. char buf2[50+1];
  102. char *res_ptr = NULL;
  103. size_t res_size = 0;
  104. size_t l1, l2;
  105. for (;;)
  106. {
  107. c = getc (fp);
  108. if (c == EOF)
  109. break;
  110. if (c == '\n' || c == ' ' || c == '\t')
  111. continue;
  112. if (c == '#')
  113. {
  114. /* Skip comment, to end of line. */
  115. do
  116. c = getc (fp);
  117. while (!(c == EOF || c == '\n'));
  118. if (c == EOF)
  119. break;
  120. continue;
  121. }
  122. ungetc (c, fp);
  123. if (fscanf(fp, "%50s %50s", buf1, buf2) < 2)
  124. break;
  125. l1 = strlen (buf1);
  126. l2 = strlen (buf2);
  127. if (res_size == 0)
  128. {
  129. res_size = l1 + 1 + l2 + 1;
  130. res_ptr = malloc (res_size + 1);
  131. }
  132. else
  133. {
  134. res_size += l1 + 1 + l2 + 1;
  135. res_ptr = realloc (res_ptr, res_size + 1);
  136. }
  137. if (res_ptr == NULL)
  138. {
  139. /* Out of memory. */
  140. res_size = 0;
  141. break;
  142. }
  143. strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
  144. strcpy (res_ptr + res_size - (l2 + 1), buf2);
  145. }
  146. fclose (fp);
  147. if (res_size == 0)
  148. cp = "";
  149. else
  150. {
  151. *(res_ptr + res_size) = '\0';
  152. cp = res_ptr;
  153. }
  154. }
  155. if (file_name != NULL)
  156. free (file_name);
  157. #else /* WIN32 */
  158. /* To avoid the troubles of installing a separate file in the same
  159. directory as the DLL and of retrieving the DLL's directory at
  160. runtime, simply inline the aliases here. */
  161. cp = "CP936" "\0" "GBK" "\0"
  162. "CP1361" "\0" "JOHAB" "\0";
  163. #endif
  164. charset_aliases = cp;
  165. }
  166. return cp;
  167. }
  168. /* Determine the current locale's character encoding, and canonicalize it
  169. into one of the canonical names listed in config.charset.
  170. The result must not be freed; it is statically allocated.
  171. If the canonical name cannot be determined, the result is a non-canonical
  172. name. */
  173. #ifdef STATIC
  174. STATIC
  175. #endif
  176. const char *
  177. locale_charset ()
  178. {
  179. const char *codeset;
  180. const char *aliases;
  181. #ifndef WIN32
  182. # if HAVE_LANGINFO_CODESET
  183. /* Most systems support nl_langinfo (CODESET) nowadays. */
  184. codeset = nl_langinfo (CODESET);
  185. # else
  186. /* On old systems which lack it, use setlocale or getenv. */
  187. const char *locale = NULL;
  188. /* But most old systems don't have a complete set of locales. Some
  189. (like SunOS 4 or DJGPP) have only the C locale. Therefore we don't
  190. use setlocale here; it would return "C" when it doesn't support the
  191. locale name the user has set. */
  192. # if HAVE_SETLOCALE && 0
  193. locale = setlocale (LC_CTYPE, NULL);
  194. # endif
  195. if (locale == NULL || locale[0] == '\0')
  196. {
  197. locale = getenv ("LC_ALL");
  198. if (locale == NULL || locale[0] == '\0')
  199. {
  200. locale = getenv ("LC_CTYPE");
  201. if (locale == NULL || locale[0] == '\0')
  202. locale = getenv ("LANG");
  203. }
  204. }
  205. /* On some old systems, one used to set locale = "iso8859_1". On others,
  206. you set it to "language_COUNTRY.charset". In any case, we resolve it
  207. through the charset.alias file. */
  208. codeset = locale;
  209. # endif
  210. #else /* WIN32 */
  211. static char buf[2 + 10 + 1];
  212. /* Win32 has a function returning the locale's codepage as a number. */
  213. sprintf (buf, "CP%u", GetACP ());
  214. codeset = buf;
  215. #endif
  216. if (codeset == NULL)
  217. /* The canonical name cannot be determined. */
  218. codeset = "";
  219. /* Resolve alias. */
  220. for (aliases = get_charset_aliases ();
  221. *aliases != '\0';
  222. aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
  223. if (strcmp (codeset, aliases) == 0
  224. || (aliases[0] == '*' && aliases[1] == '\0'))
  225. {
  226. codeset = aliases + strlen (aliases) + 1;
  227. break;
  228. }
  229. return codeset;
  230. }