unistr.in.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. /* Elementary Unicode string functions.
  2. Copyright (C) 2001-2002, 2005-2012 Free Software Foundation, Inc.
  3. This program is free software: you can redistribute it and/or modify it
  4. under the terms of the GNU Lesser General Public License as published
  5. by the Free Software Foundation; either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10. Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>. */
  13. #ifndef _UNISTR_H
  14. #define _UNISTR_H
  15. #include "unitypes.h"
  16. /* Get common macros for C. */
  17. #include "unused-parameter.h"
  18. /* Get bool. */
  19. #include <stdbool.h>
  20. /* Get size_t. */
  21. #include <stddef.h>
  22. #ifdef __cplusplus
  23. extern "C" {
  24. #endif
  25. /* Conventions:
  26. All functions prefixed with u8_ operate on UTF-8 encoded strings.
  27. Their unit is an uint8_t (1 byte).
  28. All functions prefixed with u16_ operate on UTF-16 encoded strings.
  29. Their unit is an uint16_t (a 2-byte word).
  30. All functions prefixed with u32_ operate on UCS-4 encoded strings.
  31. Their unit is an uint32_t (a 4-byte word).
  32. All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
  33. n units.
  34. All arguments starting with "str" and the arguments of functions starting
  35. with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
  36. which terminates at the first NUL unit. This termination unit is
  37. considered part of the string for all memory allocation purposes, but
  38. is not considered part of the string for all other logical purposes.
  39. Functions returning a string result take a (resultbuf, lengthp) argument
  40. pair. If resultbuf is not NULL and the result fits into *lengthp units,
  41. it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
  42. allocated string is returned. In both cases, *lengthp is set to the
  43. length (number of units) of the returned string. In case of error,
  44. NULL is returned and errno is set. */
  45. /* Elementary string checks. */
  46. /* Check whether an UTF-8 string is well-formed.
  47. Return NULL if valid, or a pointer to the first invalid unit otherwise. */
  48. extern const uint8_t *
  49. u8_check (const uint8_t *s, size_t n);
  50. /* Check whether an UTF-16 string is well-formed.
  51. Return NULL if valid, or a pointer to the first invalid unit otherwise. */
  52. extern const uint16_t *
  53. u16_check (const uint16_t *s, size_t n);
  54. /* Check whether an UCS-4 string is well-formed.
  55. Return NULL if valid, or a pointer to the first invalid unit otherwise. */
  56. extern const uint32_t *
  57. u32_check (const uint32_t *s, size_t n);
  58. /* Elementary string conversions. */
  59. /* Convert an UTF-8 string to an UTF-16 string. */
  60. extern uint16_t *
  61. u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
  62. size_t *lengthp);
  63. /* Convert an UTF-8 string to an UCS-4 string. */
  64. extern uint32_t *
  65. u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
  66. size_t *lengthp);
  67. /* Convert an UTF-16 string to an UTF-8 string. */
  68. extern uint8_t *
  69. u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
  70. size_t *lengthp);
  71. /* Convert an UTF-16 string to an UCS-4 string. */
  72. extern uint32_t *
  73. u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
  74. size_t *lengthp);
  75. /* Convert an UCS-4 string to an UTF-8 string. */
  76. extern uint8_t *
  77. u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
  78. size_t *lengthp);
  79. /* Convert an UCS-4 string to an UTF-16 string. */
  80. extern uint16_t *
  81. u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
  82. size_t *lengthp);
  83. /* Elementary string functions. */
  84. /* Return the length (number of units) of the first character in S, which is
  85. no longer than N. Return 0 if it is the NUL character. Return -1 upon
  86. failure. */
  87. /* Similar to mblen(), except that s must not be NULL. */
  88. extern int
  89. u8_mblen (const uint8_t *s, size_t n);
  90. extern int
  91. u16_mblen (const uint16_t *s, size_t n);
  92. extern int
  93. u32_mblen (const uint32_t *s, size_t n);
  94. /* Return the length (number of units) of the first character in S, putting
  95. its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
  96. and an appropriate number of units is returned.
  97. The number of available units, N, must be > 0. */
  98. /* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
  99. and the NUL character is not treated specially. */
  100. /* The variants with _safe suffix are safe, even if the library is compiled
  101. without --enable-safety. */
  102. #if GNULIB_UNISTR_U8_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
  103. # if !HAVE_INLINE
  104. extern int
  105. u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
  106. # else
  107. extern int
  108. u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
  109. static inline int
  110. u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
  111. {
  112. uint8_t c = *s;
  113. if (c < 0x80)
  114. {
  115. *puc = c;
  116. return 1;
  117. }
  118. else
  119. return u8_mbtouc_unsafe_aux (puc, s, n);
  120. }
  121. # endif
  122. #endif
  123. #if GNULIB_UNISTR_U16_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
  124. # if !HAVE_INLINE
  125. extern int
  126. u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
  127. # else
  128. extern int
  129. u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
  130. static inline int
  131. u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
  132. {
  133. uint16_t c = *s;
  134. if (c < 0xd800 || c >= 0xe000)
  135. {
  136. *puc = c;
  137. return 1;
  138. }
  139. else
  140. return u16_mbtouc_unsafe_aux (puc, s, n);
  141. }
  142. # endif
  143. #endif
  144. #if GNULIB_UNISTR_U32_MBTOUC_UNSAFE || HAVE_LIBUNISTRING
  145. # if !HAVE_INLINE
  146. extern int
  147. u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
  148. # else
  149. static inline int
  150. u32_mbtouc_unsafe (ucs4_t *puc,
  151. const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
  152. {
  153. uint32_t c = *s;
  154. # if CONFIG_UNICODE_SAFETY
  155. if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
  156. # endif
  157. *puc = c;
  158. # if CONFIG_UNICODE_SAFETY
  159. else
  160. /* invalid multibyte character */
  161. *puc = 0xfffd;
  162. # endif
  163. return 1;
  164. }
  165. # endif
  166. #endif
  167. #if GNULIB_UNISTR_U8_MBTOUC || HAVE_LIBUNISTRING
  168. # if !HAVE_INLINE
  169. extern int
  170. u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
  171. # else
  172. extern int
  173. u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
  174. static inline int
  175. u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
  176. {
  177. uint8_t c = *s;
  178. if (c < 0x80)
  179. {
  180. *puc = c;
  181. return 1;
  182. }
  183. else
  184. return u8_mbtouc_aux (puc, s, n);
  185. }
  186. # endif
  187. #endif
  188. #if GNULIB_UNISTR_U16_MBTOUC || HAVE_LIBUNISTRING
  189. # if !HAVE_INLINE
  190. extern int
  191. u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
  192. # else
  193. extern int
  194. u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
  195. static inline int
  196. u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
  197. {
  198. uint16_t c = *s;
  199. if (c < 0xd800 || c >= 0xe000)
  200. {
  201. *puc = c;
  202. return 1;
  203. }
  204. else
  205. return u16_mbtouc_aux (puc, s, n);
  206. }
  207. # endif
  208. #endif
  209. #if GNULIB_UNISTR_U32_MBTOUC || HAVE_LIBUNISTRING
  210. # if !HAVE_INLINE
  211. extern int
  212. u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
  213. # else
  214. static inline int
  215. u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _GL_UNUSED_PARAMETER)
  216. {
  217. uint32_t c = *s;
  218. if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
  219. *puc = c;
  220. else
  221. /* invalid multibyte character */
  222. *puc = 0xfffd;
  223. return 1;
  224. }
  225. # endif
  226. #endif
  227. /* Return the length (number of units) of the first character in S, putting
  228. its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
  229. and -1 is returned for an invalid sequence of units, -2 is returned for an
  230. incomplete sequence of units.
  231. The number of available units, N, must be > 0. */
  232. /* Similar to u*_mbtouc(), except that the return value gives more details
  233. about the failure, similar to mbrtowc(). */
  234. #if GNULIB_UNISTR_U8_MBTOUCR || HAVE_LIBUNISTRING
  235. extern int
  236. u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
  237. #endif
  238. #if GNULIB_UNISTR_U16_MBTOUCR || HAVE_LIBUNISTRING
  239. extern int
  240. u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
  241. #endif
  242. #if GNULIB_UNISTR_U32_MBTOUCR || HAVE_LIBUNISTRING
  243. extern int
  244. u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
  245. #endif
  246. /* Put the multibyte character represented by UC in S, returning its
  247. length. Return -1 upon failure, -2 if the number of available units, N,
  248. is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
  249. /* Similar to wctomb(), except that s must not be NULL, and the argument n
  250. must be specified. */
  251. #if GNULIB_UNISTR_U8_UCTOMB || HAVE_LIBUNISTRING
  252. /* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
  253. extern int
  254. u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
  255. # if !HAVE_INLINE
  256. extern int
  257. u8_uctomb (uint8_t *s, ucs4_t uc, int n);
  258. # else
  259. static inline int
  260. u8_uctomb (uint8_t *s, ucs4_t uc, int n)
  261. {
  262. if (uc < 0x80 && n > 0)
  263. {
  264. s[0] = uc;
  265. return 1;
  266. }
  267. else
  268. return u8_uctomb_aux (s, uc, n);
  269. }
  270. # endif
  271. #endif
  272. #if GNULIB_UNISTR_U16_UCTOMB || HAVE_LIBUNISTRING
  273. /* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
  274. extern int
  275. u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
  276. # if !HAVE_INLINE
  277. extern int
  278. u16_uctomb (uint16_t *s, ucs4_t uc, int n);
  279. # else
  280. static inline int
  281. u16_uctomb (uint16_t *s, ucs4_t uc, int n)
  282. {
  283. if (uc < 0xd800 && n > 0)
  284. {
  285. s[0] = uc;
  286. return 1;
  287. }
  288. else
  289. return u16_uctomb_aux (s, uc, n);
  290. }
  291. # endif
  292. #endif
  293. #if GNULIB_UNISTR_U32_UCTOMB || HAVE_LIBUNISTRING
  294. # if !HAVE_INLINE
  295. extern int
  296. u32_uctomb (uint32_t *s, ucs4_t uc, int n);
  297. # else
  298. static inline int
  299. u32_uctomb (uint32_t *s, ucs4_t uc, int n)
  300. {
  301. if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
  302. {
  303. if (n > 0)
  304. {
  305. *s = uc;
  306. return 1;
  307. }
  308. else
  309. return -2;
  310. }
  311. else
  312. return -1;
  313. }
  314. # endif
  315. #endif
  316. /* Copy N units from SRC to DEST. */
  317. /* Similar to memcpy(). */
  318. extern uint8_t *
  319. u8_cpy (uint8_t *dest, const uint8_t *src, size_t n);
  320. extern uint16_t *
  321. u16_cpy (uint16_t *dest, const uint16_t *src, size_t n);
  322. extern uint32_t *
  323. u32_cpy (uint32_t *dest, const uint32_t *src, size_t n);
  324. /* Copy N units from SRC to DEST, guaranteeing correct behavior for
  325. overlapping memory areas. */
  326. /* Similar to memmove(). */
  327. extern uint8_t *
  328. u8_move (uint8_t *dest, const uint8_t *src, size_t n);
  329. extern uint16_t *
  330. u16_move (uint16_t *dest, const uint16_t *src, size_t n);
  331. extern uint32_t *
  332. u32_move (uint32_t *dest, const uint32_t *src, size_t n);
  333. /* Set the first N characters of S to UC. UC should be a character that
  334. occupies only 1 unit. */
  335. /* Similar to memset(). */
  336. extern uint8_t *
  337. u8_set (uint8_t *s, ucs4_t uc, size_t n);
  338. extern uint16_t *
  339. u16_set (uint16_t *s, ucs4_t uc, size_t n);
  340. extern uint32_t *
  341. u32_set (uint32_t *s, ucs4_t uc, size_t n);
  342. /* Compare S1 and S2, each of length N. */
  343. /* Similar to memcmp(). */
  344. extern int
  345. u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n);
  346. extern int
  347. u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n);
  348. extern int
  349. u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n);
  350. /* Compare S1 and S2. */
  351. /* Similar to the gnulib function memcmp2(). */
  352. extern int
  353. u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2);
  354. extern int
  355. u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2);
  356. extern int
  357. u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2);
  358. /* Search the string at S for UC. */
  359. /* Similar to memchr(). */
  360. extern uint8_t *
  361. u8_chr (const uint8_t *s, size_t n, ucs4_t uc);
  362. extern uint16_t *
  363. u16_chr (const uint16_t *s, size_t n, ucs4_t uc);
  364. extern uint32_t *
  365. u32_chr (const uint32_t *s, size_t n, ucs4_t uc);
  366. /* Count the number of Unicode characters in the N units from S. */
  367. /* Similar to mbsnlen(). */
  368. extern size_t
  369. u8_mbsnlen (const uint8_t *s, size_t n);
  370. extern size_t
  371. u16_mbsnlen (const uint16_t *s, size_t n);
  372. extern size_t
  373. u32_mbsnlen (const uint32_t *s, size_t n);
  374. /* Elementary string functions with memory allocation. */
  375. /* Make a freshly allocated copy of S, of length N. */
  376. extern uint8_t *
  377. u8_cpy_alloc (const uint8_t *s, size_t n);
  378. extern uint16_t *
  379. u16_cpy_alloc (const uint16_t *s, size_t n);
  380. extern uint32_t *
  381. u32_cpy_alloc (const uint32_t *s, size_t n);
  382. /* Elementary string functions on NUL terminated strings. */
  383. /* Return the length (number of units) of the first character in S.
  384. Return 0 if it is the NUL character. Return -1 upon failure. */
  385. extern int
  386. u8_strmblen (const uint8_t *s);
  387. extern int
  388. u16_strmblen (const uint16_t *s);
  389. extern int
  390. u32_strmblen (const uint32_t *s);
  391. /* Return the length (number of units) of the first character in S, putting
  392. its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL
  393. character. Return -1 upon failure. */
  394. extern int
  395. u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
  396. extern int
  397. u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
  398. extern int
  399. u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
  400. /* Forward iteration step. Advances the pointer past the next character,
  401. or returns NULL if the end of the string has been reached. Puts the
  402. character's 'ucs4_t' representation in *PUC. */
  403. extern const uint8_t *
  404. u8_next (ucs4_t *puc, const uint8_t *s);
  405. extern const uint16_t *
  406. u16_next (ucs4_t *puc, const uint16_t *s);
  407. extern const uint32_t *
  408. u32_next (ucs4_t *puc, const uint32_t *s);
  409. /* Backward iteration step. Advances the pointer to point to the previous
  410. character, or returns NULL if the beginning of the string had been reached.
  411. Puts the character's 'ucs4_t' representation in *PUC. */
  412. extern const uint8_t *
  413. u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
  414. extern const uint16_t *
  415. u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
  416. extern const uint32_t *
  417. u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
  418. /* Return the number of units in S. */
  419. /* Similar to strlen(), wcslen(). */
  420. extern size_t
  421. u8_strlen (const uint8_t *s);
  422. extern size_t
  423. u16_strlen (const uint16_t *s);
  424. extern size_t
  425. u32_strlen (const uint32_t *s);
  426. /* Return the number of units in S, but at most MAXLEN. */
  427. /* Similar to strnlen(), wcsnlen(). */
  428. extern size_t
  429. u8_strnlen (const uint8_t *s, size_t maxlen);
  430. extern size_t
  431. u16_strnlen (const uint16_t *s, size_t maxlen);
  432. extern size_t
  433. u32_strnlen (const uint32_t *s, size_t maxlen);
  434. /* Copy SRC to DEST. */
  435. /* Similar to strcpy(), wcscpy(). */
  436. extern uint8_t *
  437. u8_strcpy (uint8_t *dest, const uint8_t *src);
  438. extern uint16_t *
  439. u16_strcpy (uint16_t *dest, const uint16_t *src);
  440. extern uint32_t *
  441. u32_strcpy (uint32_t *dest, const uint32_t *src);
  442. /* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */
  443. /* Similar to stpcpy(). */
  444. extern uint8_t *
  445. u8_stpcpy (uint8_t *dest, const uint8_t *src);
  446. extern uint16_t *
  447. u16_stpcpy (uint16_t *dest, const uint16_t *src);
  448. extern uint32_t *
  449. u32_stpcpy (uint32_t *dest, const uint32_t *src);
  450. /* Copy no more than N units of SRC to DEST. */
  451. /* Similar to strncpy(), wcsncpy(). */
  452. extern uint8_t *
  453. u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n);
  454. extern uint16_t *
  455. u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n);
  456. extern uint32_t *
  457. u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n);
  458. /* Copy no more than N units of SRC to DEST. Return a pointer past the last
  459. non-NUL unit written into DEST. */
  460. /* Similar to stpncpy(). */
  461. extern uint8_t *
  462. u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n);
  463. extern uint16_t *
  464. u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n);
  465. extern uint32_t *
  466. u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n);
  467. /* Append SRC onto DEST. */
  468. /* Similar to strcat(), wcscat(). */
  469. extern uint8_t *
  470. u8_strcat (uint8_t *dest, const uint8_t *src);
  471. extern uint16_t *
  472. u16_strcat (uint16_t *dest, const uint16_t *src);
  473. extern uint32_t *
  474. u32_strcat (uint32_t *dest, const uint32_t *src);
  475. /* Append no more than N units of SRC onto DEST. */
  476. /* Similar to strncat(), wcsncat(). */
  477. extern uint8_t *
  478. u8_strncat (uint8_t *dest, const uint8_t *src, size_t n);
  479. extern uint16_t *
  480. u16_strncat (uint16_t *dest, const uint16_t *src, size_t n);
  481. extern uint32_t *
  482. u32_strncat (uint32_t *dest, const uint32_t *src, size_t n);
  483. /* Compare S1 and S2. */
  484. /* Similar to strcmp(), wcscmp(). */
  485. #ifdef __sun
  486. /* Avoid a collision with the u8_strcmp() function in Solaris 11 libc. */
  487. extern int
  488. u8_strcmp_gnu (const uint8_t *s1, const uint8_t *s2);
  489. # define u8_strcmp u8_strcmp_gnu
  490. #else
  491. extern int
  492. u8_strcmp (const uint8_t *s1, const uint8_t *s2);
  493. #endif
  494. extern int
  495. u16_strcmp (const uint16_t *s1, const uint16_t *s2);
  496. extern int
  497. u32_strcmp (const uint32_t *s1, const uint32_t *s2);
  498. /* Compare S1 and S2 using the collation rules of the current locale.
  499. Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
  500. Upon failure, set errno and return any value. */
  501. /* Similar to strcoll(), wcscoll(). */
  502. extern int
  503. u8_strcoll (const uint8_t *s1, const uint8_t *s2);
  504. extern int
  505. u16_strcoll (const uint16_t *s1, const uint16_t *s2);
  506. extern int
  507. u32_strcoll (const uint32_t *s1, const uint32_t *s2);
  508. /* Compare no more than N units of S1 and S2. */
  509. /* Similar to strncmp(), wcsncmp(). */
  510. extern int
  511. u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n);
  512. extern int
  513. u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n);
  514. extern int
  515. u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n);
  516. /* Duplicate S, returning an identical malloc'd string. */
  517. /* Similar to strdup(), wcsdup(). */
  518. extern uint8_t *
  519. u8_strdup (const uint8_t *s);
  520. extern uint16_t *
  521. u16_strdup (const uint16_t *s);
  522. extern uint32_t *
  523. u32_strdup (const uint32_t *s);
  524. /* Find the first occurrence of UC in STR. */
  525. /* Similar to strchr(), wcschr(). */
  526. extern uint8_t *
  527. u8_strchr (const uint8_t *str, ucs4_t uc);
  528. extern uint16_t *
  529. u16_strchr (const uint16_t *str, ucs4_t uc);
  530. extern uint32_t *
  531. u32_strchr (const uint32_t *str, ucs4_t uc);
  532. /* Find the last occurrence of UC in STR. */
  533. /* Similar to strrchr(), wcsrchr(). */
  534. extern uint8_t *
  535. u8_strrchr (const uint8_t *str, ucs4_t uc);
  536. extern uint16_t *
  537. u16_strrchr (const uint16_t *str, ucs4_t uc);
  538. extern uint32_t *
  539. u32_strrchr (const uint32_t *str, ucs4_t uc);
  540. /* Return the length of the initial segment of STR which consists entirely
  541. of Unicode characters not in REJECT. */
  542. /* Similar to strcspn(), wcscspn(). */
  543. extern size_t
  544. u8_strcspn (const uint8_t *str, const uint8_t *reject);
  545. extern size_t
  546. u16_strcspn (const uint16_t *str, const uint16_t *reject);
  547. extern size_t
  548. u32_strcspn (const uint32_t *str, const uint32_t *reject);
  549. /* Return the length of the initial segment of STR which consists entirely
  550. of Unicode characters in ACCEPT. */
  551. /* Similar to strspn(), wcsspn(). */
  552. extern size_t
  553. u8_strspn (const uint8_t *str, const uint8_t *accept);
  554. extern size_t
  555. u16_strspn (const uint16_t *str, const uint16_t *accept);
  556. extern size_t
  557. u32_strspn (const uint32_t *str, const uint32_t *accept);
  558. /* Find the first occurrence in STR of any character in ACCEPT. */
  559. /* Similar to strpbrk(), wcspbrk(). */
  560. extern uint8_t *
  561. u8_strpbrk (const uint8_t *str, const uint8_t *accept);
  562. extern uint16_t *
  563. u16_strpbrk (const uint16_t *str, const uint16_t *accept);
  564. extern uint32_t *
  565. u32_strpbrk (const uint32_t *str, const uint32_t *accept);
  566. /* Find the first occurrence of NEEDLE in HAYSTACK. */
  567. /* Similar to strstr(), wcsstr(). */
  568. extern uint8_t *
  569. u8_strstr (const uint8_t *haystack, const uint8_t *needle);
  570. extern uint16_t *
  571. u16_strstr (const uint16_t *haystack, const uint16_t *needle);
  572. extern uint32_t *
  573. u32_strstr (const uint32_t *haystack, const uint32_t *needle);
  574. /* Test whether STR starts with PREFIX. */
  575. extern bool
  576. u8_startswith (const uint8_t *str, const uint8_t *prefix);
  577. extern bool
  578. u16_startswith (const uint16_t *str, const uint16_t *prefix);
  579. extern bool
  580. u32_startswith (const uint32_t *str, const uint32_t *prefix);
  581. /* Test whether STR ends with SUFFIX. */
  582. extern bool
  583. u8_endswith (const uint8_t *str, const uint8_t *suffix);
  584. extern bool
  585. u16_endswith (const uint16_t *str, const uint16_t *suffix);
  586. extern bool
  587. u32_endswith (const uint32_t *str, const uint32_t *suffix);
  588. /* Divide STR into tokens separated by characters in DELIM.
  589. This interface is actually more similar to wcstok than to strtok. */
  590. /* Similar to strtok_r(), wcstok(). */
  591. extern uint8_t *
  592. u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr);
  593. extern uint16_t *
  594. u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr);
  595. extern uint32_t *
  596. u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr);
  597. #ifdef __cplusplus
  598. }
  599. #endif
  600. #endif /* _UNISTR_H */