mbrtowc.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /* Convert multibyte character to wide character.
  2. Copyright (C) 1999-2002, 2005-2022 Free Software Foundation, Inc.
  3. Written by Bruno Haible <bruno@clisp.org>, 2008.
  4. This file is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as
  6. published by the Free Software Foundation; either version 2.1 of the
  7. License, or (at your option) any later version.
  8. This file is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  14. #include <config.h>
  15. /* Specification. */
  16. #include <wchar.h>
  17. #if GNULIB_defined_mbstate_t
  18. /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
  19. and directly for the UTF-8 locales. */
  20. # include <errno.h>
  21. # include <stdint.h>
  22. # include <stdlib.h>
  23. # if defined _WIN32 && !defined __CYGWIN__
  24. # define WIN32_LEAN_AND_MEAN /* avoid including junk */
  25. # include <windows.h>
  26. # elif HAVE_PTHREAD_API
  27. # include <pthread.h>
  28. # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
  29. # include <threads.h>
  30. # pragma weak thrd_exit
  31. # define c11_threads_in_use() (thrd_exit != NULL)
  32. # else
  33. # define c11_threads_in_use() 0
  34. # endif
  35. # elif HAVE_THREADS_H
  36. # include <threads.h>
  37. # endif
  38. # include "attribute.h"
  39. # include "verify.h"
  40. # include "lc-charset-dispatch.h"
  41. # include "mbtowc-lock.h"
  42. verify (sizeof (mbstate_t) >= 4);
  43. static char internal_state[4];
  44. size_t
  45. mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
  46. {
  47. # define FITS_IN_CHAR_TYPE(wc) ((wc) <= WCHAR_MAX)
  48. # include "mbrtowc-impl.h"
  49. }
  50. #else
  51. /* Override the system's mbrtowc() function. */
  52. # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
  53. # include "hard-locale.h"
  54. # include <locale.h>
  55. # endif
  56. # undef mbrtowc
  57. size_t
  58. rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
  59. {
  60. size_t ret;
  61. wchar_t wc;
  62. # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
  63. if (s == NULL)
  64. {
  65. pwc = NULL;
  66. s = "";
  67. n = 1;
  68. }
  69. # endif
  70. # if MBRTOWC_EMPTY_INPUT_BUG
  71. if (n == 0)
  72. return (size_t) -2;
  73. # endif
  74. if (! pwc)
  75. pwc = &wc;
  76. # if MBRTOWC_RETVAL_BUG
  77. {
  78. static mbstate_t internal_state;
  79. /* Override mbrtowc's internal state. We cannot call mbsinit() on the
  80. hidden internal state, but we can call it on our variable. */
  81. if (ps == NULL)
  82. ps = &internal_state;
  83. if (!mbsinit (ps))
  84. {
  85. /* Parse the rest of the multibyte character byte for byte. */
  86. size_t count = 0;
  87. for (; n > 0; s++, n--)
  88. {
  89. ret = mbrtowc (&wc, s, 1, ps);
  90. if (ret == (size_t)(-1))
  91. return (size_t)(-1);
  92. count++;
  93. if (ret != (size_t)(-2))
  94. {
  95. /* The multibyte character has been completed. */
  96. *pwc = wc;
  97. return (wc == 0 ? 0 : count);
  98. }
  99. }
  100. return (size_t)(-2);
  101. }
  102. }
  103. # endif
  104. # if MBRTOWC_STORES_INCOMPLETE_BUG
  105. ret = mbrtowc (&wc, s, n, ps);
  106. if (ret < (size_t) -2 && pwc != NULL)
  107. *pwc = wc;
  108. # else
  109. ret = mbrtowc (pwc, s, n, ps);
  110. # endif
  111. # if MBRTOWC_NUL_RETVAL_BUG
  112. if (ret < (size_t) -2 && !*pwc)
  113. return 0;
  114. # endif
  115. # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
  116. if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
  117. {
  118. unsigned char uc = *s;
  119. *pwc = uc;
  120. return 1;
  121. }
  122. # endif
  123. return ret;
  124. }
  125. #endif