u8-prev.c 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /* Iterate over previous character in UTF-8 string.
  2. Copyright (C) 2002, 2006-2007, 2009-2014 Free Software Foundation, Inc.
  3. Written by Bruno Haible <bruno@clisp.org>, 2002.
  4. This program is free software: you can redistribute it and/or modify it
  5. under the terms of the GNU Lesser General Public License as published
  6. by the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  14. #include <config.h>
  15. /* Specification. */
  16. #include "unistr.h"
  17. const uint8_t *
  18. u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start)
  19. {
  20. /* Keep in sync with unistr.h and u8-mbtouc-aux.c. */
  21. if (s != start)
  22. {
  23. uint8_t c_1 = s[-1];
  24. if (c_1 < 0x80)
  25. {
  26. *puc = c_1;
  27. return s - 1;
  28. }
  29. #if CONFIG_UNICODE_SAFETY
  30. if ((c_1 ^ 0x80) < 0x40)
  31. #endif
  32. if (s - 1 != start)
  33. {
  34. uint8_t c_2 = s[-2];
  35. if (c_2 >= 0xc2 && c_2 < 0xe0)
  36. {
  37. *puc = ((unsigned int) (c_2 & 0x1f) << 6)
  38. | (unsigned int) (c_1 ^ 0x80);
  39. return s - 2;
  40. }
  41. #if CONFIG_UNICODE_SAFETY
  42. if ((c_2 ^ 0x80) < 0x40)
  43. #endif
  44. if (s - 2 != start)
  45. {
  46. uint8_t c_3 = s[-3];
  47. if (c_3 >= 0xe0 && c_3 < 0xf0
  48. #if CONFIG_UNICODE_SAFETY
  49. && (c_3 >= 0xe1 || c_2 >= 0xa0)
  50. && (c_3 != 0xed || c_2 < 0xa0)
  51. #endif
  52. )
  53. {
  54. *puc = ((unsigned int) (c_3 & 0x0f) << 12)
  55. | ((unsigned int) (c_2 ^ 0x80) << 6)
  56. | (unsigned int) (c_1 ^ 0x80);
  57. return s - 3;
  58. }
  59. #if CONFIG_UNICODE_SAFETY
  60. if ((c_3 ^ 0x80) < 0x40)
  61. #endif
  62. if (s - 3 != start)
  63. {
  64. uint8_t c_4 = s[-4];
  65. if (c_4 >= 0xf0 && c_4 < 0xf8
  66. #if CONFIG_UNICODE_SAFETY
  67. && (c_4 >= 0xf1 || c_3 >= 0x90)
  68. && (c_4 < 0xf4 || (c_4 == 0xf4 && c_3 < 0x90))
  69. #endif
  70. )
  71. {
  72. *puc = ((unsigned int) (c_4 & 0x07) << 18)
  73. | ((unsigned int) (c_3 ^ 0x80) << 12)
  74. | ((unsigned int) (c_2 ^ 0x80) << 6)
  75. | (unsigned int) (c_1 ^ 0x80);
  76. return s - 4;
  77. }
  78. }
  79. }
  80. }
  81. }
  82. return NULL;
  83. }