mbrtowc-impl-utf8.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. /* Convert multibyte character to wide character.
  2. Copyright (C) 1999-2002, 2005-2021 Free Software Foundation, Inc.
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Lesser General Public License as published by
  5. the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  13. /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
  14. /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
  15. that handles the special case of the UTF-8 encoding. */
  16. /* Cf. unistr/u8-mbtouc.c. */
  17. unsigned char c = (unsigned char) p[0];
  18. if (c < 0x80)
  19. {
  20. if (pwc != NULL)
  21. *pwc = c;
  22. res = (c == 0 ? 0 : 1);
  23. goto success;
  24. }
  25. if (c >= 0xc2)
  26. {
  27. if (c < 0xe0)
  28. {
  29. if (m == 1)
  30. goto incomplete;
  31. else /* m >= 2 */
  32. {
  33. unsigned char c2 = (unsigned char) p[1];
  34. if ((c2 ^ 0x80) < 0x40)
  35. {
  36. if (pwc != NULL)
  37. *pwc = ((unsigned int) (c & 0x1f) << 6)
  38. | (unsigned int) (c2 ^ 0x80);
  39. res = 2;
  40. goto success;
  41. }
  42. }
  43. }
  44. else if (c < 0xf0)
  45. {
  46. if (m == 1)
  47. goto incomplete;
  48. else
  49. {
  50. unsigned char c2 = (unsigned char) p[1];
  51. if ((c2 ^ 0x80) < 0x40
  52. && (c >= 0xe1 || c2 >= 0xa0)
  53. && (c != 0xed || c2 < 0xa0))
  54. {
  55. if (m == 2)
  56. goto incomplete;
  57. else /* m >= 3 */
  58. {
  59. unsigned char c3 = (unsigned char) p[2];
  60. if ((c3 ^ 0x80) < 0x40)
  61. {
  62. unsigned int wc =
  63. (((unsigned int) (c & 0x0f) << 12)
  64. | ((unsigned int) (c2 ^ 0x80) << 6)
  65. | (unsigned int) (c3 ^ 0x80));
  66. if (FITS_IN_CHAR_TYPE (wc))
  67. {
  68. if (pwc != NULL)
  69. *pwc = wc;
  70. res = 3;
  71. goto success;
  72. }
  73. }
  74. }
  75. }
  76. }
  77. }
  78. else if (c <= 0xf4)
  79. {
  80. if (m == 1)
  81. goto incomplete;
  82. else
  83. {
  84. unsigned char c2 = (unsigned char) p[1];
  85. if ((c2 ^ 0x80) < 0x40
  86. && (c >= 0xf1 || c2 >= 0x90)
  87. && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90)))
  88. {
  89. if (m == 2)
  90. goto incomplete;
  91. else
  92. {
  93. unsigned char c3 = (unsigned char) p[2];
  94. if ((c3 ^ 0x80) < 0x40)
  95. {
  96. if (m == 3)
  97. goto incomplete;
  98. else /* m >= 4 */
  99. {
  100. unsigned char c4 = (unsigned char) p[3];
  101. if ((c4 ^ 0x80) < 0x40)
  102. {
  103. unsigned int wc =
  104. (((unsigned int) (c & 0x07) << 18)
  105. | ((unsigned int) (c2 ^ 0x80) << 12)
  106. | ((unsigned int) (c3 ^ 0x80) << 6)
  107. | (unsigned int) (c4 ^ 0x80));
  108. if (FITS_IN_CHAR_TYPE (wc))
  109. {
  110. if (pwc != NULL)
  111. *pwc = wc;
  112. res = 4;
  113. goto success;
  114. }
  115. }
  116. }
  117. }
  118. }
  119. }
  120. }
  121. }
  122. }
  123. goto invalid;