mbrtowc.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /* Convert multibyte character to wide character.
  2. Copyright (C) 1999-2002, 2005-2021 Free Software Foundation, Inc.
  3. Written by Bruno Haible <bruno@clisp.org>, 2008.
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as published by
  6. the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  14. #include <config.h>
  15. /* Specification. */
  16. #include <wchar.h>
  17. #if GNULIB_defined_mbstate_t
  18. /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
  19. and directly for the UTF-8 locales. */
  20. # include <errno.h>
  21. # include <stdint.h>
  22. # include <stdlib.h>
  23. # if defined _WIN32 && !defined __CYGWIN__
  24. # define WIN32_LEAN_AND_MEAN /* avoid including junk */
  25. # include <windows.h>
  26. # elif HAVE_PTHREAD_API
  27. # include <pthread.h>
  28. # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
  29. # include <threads.h>
  30. # pragma weak thrd_exit
  31. # define c11_threads_in_use() (thrd_exit != NULL)
  32. # else
  33. # define c11_threads_in_use() 0
  34. # endif
  35. # elif HAVE_THREADS_H
  36. # include <threads.h>
  37. # endif
  38. # include "attribute.h"
  39. # include "verify.h"
  40. # include "lc-charset-dispatch.h"
  41. # include "mbtowc-lock.h"
  42. verify (sizeof (mbstate_t) >= 4);
  43. static char internal_state[4];
  44. size_t
  45. mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
  46. {
  47. # define FITS_IN_CHAR_TYPE(wc) ((wc) <= WCHAR_MAX)
  48. # include "mbrtowc-impl.h"
  49. }
  50. #else
  51. /* Override the system's mbrtowc() function. */
  52. # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
  53. # include "hard-locale.h"
  54. # include <locale.h>
  55. # endif
  56. # undef mbrtowc
  57. size_t
  58. rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
  59. {
  60. size_t ret;
  61. wchar_t wc;
  62. # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
  63. if (s == NULL)
  64. {
  65. pwc = NULL;
  66. s = "";
  67. n = 1;
  68. }
  69. # endif
  70. # if MBRTOWC_EMPTY_INPUT_BUG
  71. if (n == 0)
  72. return (size_t) -2;
  73. # endif
  74. if (! pwc)
  75. pwc = &wc;
  76. # if MBRTOWC_RETVAL_BUG
  77. {
  78. static mbstate_t internal_state;
  79. /* Override mbrtowc's internal state. We cannot call mbsinit() on the
  80. hidden internal state, but we can call it on our variable. */
  81. if (ps == NULL)
  82. ps = &internal_state;
  83. if (!mbsinit (ps))
  84. {
  85. /* Parse the rest of the multibyte character byte for byte. */
  86. size_t count = 0;
  87. for (; n > 0; s++, n--)
  88. {
  89. ret = mbrtowc (&wc, s, 1, ps);
  90. if (ret == (size_t)(-1))
  91. return (size_t)(-1);
  92. count++;
  93. if (ret != (size_t)(-2))
  94. {
  95. /* The multibyte character has been completed. */
  96. *pwc = wc;
  97. return (wc == 0 ? 0 : count);
  98. }
  99. }
  100. return (size_t)(-2);
  101. }
  102. }
  103. # endif
  104. # if MBRTOWC_STORES_INCOMPLETE_BUG
  105. ret = mbrtowc (&wc, s, n, ps);
  106. if (ret < (size_t) -2 && pwc != NULL)
  107. *pwc = wc;
  108. # else
  109. ret = mbrtowc (pwc, s, n, ps);
  110. # endif
  111. # if MBRTOWC_NUL_RETVAL_BUG
  112. if (ret < (size_t) -2 && !*pwc)
  113. return 0;
  114. # endif
  115. # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
  116. if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
  117. {
  118. unsigned char uc = *s;
  119. *pwc = uc;
  120. return 1;
  121. }
  122. # endif
  123. return ret;
  124. }
  125. #endif