printf-parse.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. /* Formatted output to strings.
  2. Copyright (C) 1999-2000, 2002-2003, 2006-2012 Free Software Foundation, Inc.
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU Lesser General Public License as published by
  5. the Free Software Foundation; either version 2, or (at your option)
  6. any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License along
  12. with this program; if not, write to the Free Software Foundation,
  13. Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  14. /* This file can be parametrized with the following macros:
  15. CHAR_T The element type of the format string.
  16. CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
  17. in the format string are ASCII.
  18. DIRECTIVE Structure denoting a format directive.
  19. Depends on CHAR_T.
  20. DIRECTIVES Structure denoting the set of format directives of a
  21. format string. Depends on CHAR_T.
  22. PRINTF_PARSE Function that parses a format string.
  23. Depends on CHAR_T.
  24. STATIC Set to 'static' to declare the function static.
  25. ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
  26. #ifndef PRINTF_PARSE
  27. # include <config.h>
  28. #endif
  29. /* Specification. */
  30. #ifndef PRINTF_PARSE
  31. # include "printf-parse.h"
  32. #endif
  33. /* Default parameters. */
  34. #ifndef PRINTF_PARSE
  35. # define PRINTF_PARSE printf_parse
  36. # define CHAR_T char
  37. # define DIRECTIVE char_directive
  38. # define DIRECTIVES char_directives
  39. #endif
  40. /* Get size_t, NULL. */
  41. #include <stddef.h>
  42. /* Get intmax_t. */
  43. #if defined IN_LIBINTL || defined IN_LIBASPRINTF
  44. # if HAVE_STDINT_H_WITH_UINTMAX
  45. # include <stdint.h>
  46. # endif
  47. # if HAVE_INTTYPES_H_WITH_UINTMAX
  48. # include <inttypes.h>
  49. # endif
  50. #else
  51. # include <stdint.h>
  52. #endif
  53. /* malloc(), realloc(), free(). */
  54. #include <stdlib.h>
  55. /* memcpy(). */
  56. #include <string.h>
  57. /* errno. */
  58. #include <errno.h>
  59. /* Checked size_t computations. */
  60. #include "xsize.h"
  61. #if CHAR_T_ONLY_ASCII
  62. /* c_isascii(). */
  63. # include "c-ctype.h"
  64. #endif
  65. #ifdef STATIC
  66. STATIC
  67. #endif
  68. int
  69. PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
  70. {
  71. const CHAR_T *cp = format; /* pointer into format */
  72. size_t arg_posn = 0; /* number of regular arguments consumed */
  73. size_t d_allocated; /* allocated elements of d->dir */
  74. size_t a_allocated; /* allocated elements of a->arg */
  75. size_t max_width_length = 0;
  76. size_t max_precision_length = 0;
  77. d->count = 0;
  78. d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
  79. d->dir = d->direct_alloc_dir;
  80. a->count = 0;
  81. a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
  82. a->arg = a->direct_alloc_arg;
  83. #define REGISTER_ARG(_index_,_type_) \
  84. { \
  85. size_t n = (_index_); \
  86. if (n >= a_allocated) \
  87. { \
  88. size_t memory_size; \
  89. argument *memory; \
  90. \
  91. a_allocated = xtimes (a_allocated, 2); \
  92. if (a_allocated <= n) \
  93. a_allocated = xsum (n, 1); \
  94. memory_size = xtimes (a_allocated, sizeof (argument)); \
  95. if (size_overflow_p (memory_size)) \
  96. /* Overflow, would lead to out of memory. */ \
  97. goto out_of_memory; \
  98. memory = (argument *) (a->arg != a->direct_alloc_arg \
  99. ? realloc (a->arg, memory_size) \
  100. : malloc (memory_size)); \
  101. if (memory == NULL) \
  102. /* Out of memory. */ \
  103. goto out_of_memory; \
  104. if (a->arg == a->direct_alloc_arg) \
  105. memcpy (memory, a->arg, a->count * sizeof (argument)); \
  106. a->arg = memory; \
  107. } \
  108. while (a->count <= n) \
  109. a->arg[a->count++].type = TYPE_NONE; \
  110. if (a->arg[n].type == TYPE_NONE) \
  111. a->arg[n].type = (_type_); \
  112. else if (a->arg[n].type != (_type_)) \
  113. /* Ambiguous type for positional argument. */ \
  114. goto error; \
  115. }
  116. while (*cp != '\0')
  117. {
  118. CHAR_T c = *cp++;
  119. if (c == '%')
  120. {
  121. size_t arg_index = ARG_NONE;
  122. DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
  123. /* Initialize the next directive. */
  124. dp->dir_start = cp - 1;
  125. dp->flags = 0;
  126. dp->width_start = NULL;
  127. dp->width_end = NULL;
  128. dp->width_arg_index = ARG_NONE;
  129. dp->precision_start = NULL;
  130. dp->precision_end = NULL;
  131. dp->precision_arg_index = ARG_NONE;
  132. dp->arg_index = ARG_NONE;
  133. /* Test for positional argument. */
  134. if (*cp >= '0' && *cp <= '9')
  135. {
  136. const CHAR_T *np;
  137. for (np = cp; *np >= '0' && *np <= '9'; np++)
  138. ;
  139. if (*np == '$')
  140. {
  141. size_t n = 0;
  142. for (np = cp; *np >= '0' && *np <= '9'; np++)
  143. n = xsum (xtimes (n, 10), *np - '0');
  144. if (n == 0)
  145. /* Positional argument 0. */
  146. goto error;
  147. if (size_overflow_p (n))
  148. /* n too large, would lead to out of memory later. */
  149. goto error;
  150. arg_index = n - 1;
  151. cp = np + 1;
  152. }
  153. }
  154. /* Read the flags. */
  155. for (;;)
  156. {
  157. if (*cp == '\'')
  158. {
  159. dp->flags |= FLAG_GROUP;
  160. cp++;
  161. }
  162. else if (*cp == '-')
  163. {
  164. dp->flags |= FLAG_LEFT;
  165. cp++;
  166. }
  167. else if (*cp == '+')
  168. {
  169. dp->flags |= FLAG_SHOWSIGN;
  170. cp++;
  171. }
  172. else if (*cp == ' ')
  173. {
  174. dp->flags |= FLAG_SPACE;
  175. cp++;
  176. }
  177. else if (*cp == '#')
  178. {
  179. dp->flags |= FLAG_ALT;
  180. cp++;
  181. }
  182. else if (*cp == '0')
  183. {
  184. dp->flags |= FLAG_ZERO;
  185. cp++;
  186. }
  187. #if __GLIBC__ >= 2 && !defined __UCLIBC__
  188. else if (*cp == 'I')
  189. {
  190. dp->flags |= FLAG_LOCALIZED;
  191. cp++;
  192. }
  193. #endif
  194. else
  195. break;
  196. }
  197. /* Parse the field width. */
  198. if (*cp == '*')
  199. {
  200. dp->width_start = cp;
  201. cp++;
  202. dp->width_end = cp;
  203. if (max_width_length < 1)
  204. max_width_length = 1;
  205. /* Test for positional argument. */
  206. if (*cp >= '0' && *cp <= '9')
  207. {
  208. const CHAR_T *np;
  209. for (np = cp; *np >= '0' && *np <= '9'; np++)
  210. ;
  211. if (*np == '$')
  212. {
  213. size_t n = 0;
  214. for (np = cp; *np >= '0' && *np <= '9'; np++)
  215. n = xsum (xtimes (n, 10), *np - '0');
  216. if (n == 0)
  217. /* Positional argument 0. */
  218. goto error;
  219. if (size_overflow_p (n))
  220. /* n too large, would lead to out of memory later. */
  221. goto error;
  222. dp->width_arg_index = n - 1;
  223. cp = np + 1;
  224. }
  225. }
  226. if (dp->width_arg_index == ARG_NONE)
  227. {
  228. dp->width_arg_index = arg_posn++;
  229. if (dp->width_arg_index == ARG_NONE)
  230. /* arg_posn wrapped around. */
  231. goto error;
  232. }
  233. REGISTER_ARG (dp->width_arg_index, TYPE_INT);
  234. }
  235. else if (*cp >= '0' && *cp <= '9')
  236. {
  237. size_t width_length;
  238. dp->width_start = cp;
  239. for (; *cp >= '0' && *cp <= '9'; cp++)
  240. ;
  241. dp->width_end = cp;
  242. width_length = dp->width_end - dp->width_start;
  243. if (max_width_length < width_length)
  244. max_width_length = width_length;
  245. }
  246. /* Parse the precision. */
  247. if (*cp == '.')
  248. {
  249. cp++;
  250. if (*cp == '*')
  251. {
  252. dp->precision_start = cp - 1;
  253. cp++;
  254. dp->precision_end = cp;
  255. if (max_precision_length < 2)
  256. max_precision_length = 2;
  257. /* Test for positional argument. */
  258. if (*cp >= '0' && *cp <= '9')
  259. {
  260. const CHAR_T *np;
  261. for (np = cp; *np >= '0' && *np <= '9'; np++)
  262. ;
  263. if (*np == '$')
  264. {
  265. size_t n = 0;
  266. for (np = cp; *np >= '0' && *np <= '9'; np++)
  267. n = xsum (xtimes (n, 10), *np - '0');
  268. if (n == 0)
  269. /* Positional argument 0. */
  270. goto error;
  271. if (size_overflow_p (n))
  272. /* n too large, would lead to out of memory
  273. later. */
  274. goto error;
  275. dp->precision_arg_index = n - 1;
  276. cp = np + 1;
  277. }
  278. }
  279. if (dp->precision_arg_index == ARG_NONE)
  280. {
  281. dp->precision_arg_index = arg_posn++;
  282. if (dp->precision_arg_index == ARG_NONE)
  283. /* arg_posn wrapped around. */
  284. goto error;
  285. }
  286. REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
  287. }
  288. else
  289. {
  290. size_t precision_length;
  291. dp->precision_start = cp - 1;
  292. for (; *cp >= '0' && *cp <= '9'; cp++)
  293. ;
  294. dp->precision_end = cp;
  295. precision_length = dp->precision_end - dp->precision_start;
  296. if (max_precision_length < precision_length)
  297. max_precision_length = precision_length;
  298. }
  299. }
  300. {
  301. arg_type type;
  302. /* Parse argument type/size specifiers. */
  303. {
  304. int flags = 0;
  305. for (;;)
  306. {
  307. if (*cp == 'h')
  308. {
  309. flags |= (1 << (flags & 1));
  310. cp++;
  311. }
  312. else if (*cp == 'L')
  313. {
  314. flags |= 4;
  315. cp++;
  316. }
  317. else if (*cp == 'l')
  318. {
  319. flags += 8;
  320. cp++;
  321. }
  322. else if (*cp == 'j')
  323. {
  324. if (sizeof (intmax_t) > sizeof (long))
  325. {
  326. /* intmax_t = long long */
  327. flags += 16;
  328. }
  329. else if (sizeof (intmax_t) > sizeof (int))
  330. {
  331. /* intmax_t = long */
  332. flags += 8;
  333. }
  334. cp++;
  335. }
  336. else if (*cp == 'z' || *cp == 'Z')
  337. {
  338. /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
  339. because the warning facility in gcc-2.95.2 understands
  340. only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
  341. if (sizeof (size_t) > sizeof (long))
  342. {
  343. /* size_t = long long */
  344. flags += 16;
  345. }
  346. else if (sizeof (size_t) > sizeof (int))
  347. {
  348. /* size_t = long */
  349. flags += 8;
  350. }
  351. cp++;
  352. }
  353. else if (*cp == 't')
  354. {
  355. if (sizeof (ptrdiff_t) > sizeof (long))
  356. {
  357. /* ptrdiff_t = long long */
  358. flags += 16;
  359. }
  360. else if (sizeof (ptrdiff_t) > sizeof (int))
  361. {
  362. /* ptrdiff_t = long */
  363. flags += 8;
  364. }
  365. cp++;
  366. }
  367. #if defined __APPLE__ && defined __MACH__
  368. /* On MacOS X 10.3, PRIdMAX is defined as "qd".
  369. We cannot change it to "lld" because PRIdMAX must also
  370. be understood by the system's printf routines. */
  371. else if (*cp == 'q')
  372. {
  373. if (64 / 8 > sizeof (long))
  374. {
  375. /* int64_t = long long */
  376. flags += 16;
  377. }
  378. else
  379. {
  380. /* int64_t = long */
  381. flags += 8;
  382. }
  383. cp++;
  384. }
  385. #endif
  386. #if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
  387. /* On native Windows, PRIdMAX is defined as "I64d".
  388. We cannot change it to "lld" because PRIdMAX must also
  389. be understood by the system's printf routines. */
  390. else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
  391. {
  392. if (64 / 8 > sizeof (long))
  393. {
  394. /* __int64 = long long */
  395. flags += 16;
  396. }
  397. else
  398. {
  399. /* __int64 = long */
  400. flags += 8;
  401. }
  402. cp += 3;
  403. }
  404. #endif
  405. else
  406. break;
  407. }
  408. /* Read the conversion character. */
  409. c = *cp++;
  410. switch (c)
  411. {
  412. case 'd': case 'i':
  413. #if HAVE_LONG_LONG_INT
  414. /* If 'long long' exists and is larger than 'long': */
  415. if (flags >= 16 || (flags & 4))
  416. type = TYPE_LONGLONGINT;
  417. else
  418. #endif
  419. /* If 'long long' exists and is the same as 'long', we parse
  420. "lld" into TYPE_LONGINT. */
  421. if (flags >= 8)
  422. type = TYPE_LONGINT;
  423. else if (flags & 2)
  424. type = TYPE_SCHAR;
  425. else if (flags & 1)
  426. type = TYPE_SHORT;
  427. else
  428. type = TYPE_INT;
  429. break;
  430. case 'o': case 'u': case 'x': case 'X':
  431. #if HAVE_LONG_LONG_INT
  432. /* If 'long long' exists and is larger than 'long': */
  433. if (flags >= 16 || (flags & 4))
  434. type = TYPE_ULONGLONGINT;
  435. else
  436. #endif
  437. /* If 'unsigned long long' exists and is the same as
  438. 'unsigned long', we parse "llu" into TYPE_ULONGINT. */
  439. if (flags >= 8)
  440. type = TYPE_ULONGINT;
  441. else if (flags & 2)
  442. type = TYPE_UCHAR;
  443. else if (flags & 1)
  444. type = TYPE_USHORT;
  445. else
  446. type = TYPE_UINT;
  447. break;
  448. case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
  449. case 'a': case 'A':
  450. if (flags >= 16 || (flags & 4))
  451. type = TYPE_LONGDOUBLE;
  452. else
  453. type = TYPE_DOUBLE;
  454. break;
  455. case 'c':
  456. if (flags >= 8)
  457. #if HAVE_WINT_T
  458. type = TYPE_WIDE_CHAR;
  459. #else
  460. goto error;
  461. #endif
  462. else
  463. type = TYPE_CHAR;
  464. break;
  465. #if HAVE_WINT_T
  466. case 'C':
  467. type = TYPE_WIDE_CHAR;
  468. c = 'c';
  469. break;
  470. #endif
  471. case 's':
  472. if (flags >= 8)
  473. #if HAVE_WCHAR_T
  474. type = TYPE_WIDE_STRING;
  475. #else
  476. goto error;
  477. #endif
  478. else
  479. type = TYPE_STRING;
  480. break;
  481. #if HAVE_WCHAR_T
  482. case 'S':
  483. type = TYPE_WIDE_STRING;
  484. c = 's';
  485. break;
  486. #endif
  487. case 'p':
  488. type = TYPE_POINTER;
  489. break;
  490. case 'n':
  491. #if HAVE_LONG_LONG_INT
  492. /* If 'long long' exists and is larger than 'long': */
  493. if (flags >= 16 || (flags & 4))
  494. type = TYPE_COUNT_LONGLONGINT_POINTER;
  495. else
  496. #endif
  497. /* If 'long long' exists and is the same as 'long', we parse
  498. "lln" into TYPE_COUNT_LONGINT_POINTER. */
  499. if (flags >= 8)
  500. type = TYPE_COUNT_LONGINT_POINTER;
  501. else if (flags & 2)
  502. type = TYPE_COUNT_SCHAR_POINTER;
  503. else if (flags & 1)
  504. type = TYPE_COUNT_SHORT_POINTER;
  505. else
  506. type = TYPE_COUNT_INT_POINTER;
  507. break;
  508. #if ENABLE_UNISTDIO
  509. /* The unistdio extensions. */
  510. case 'U':
  511. if (flags >= 16)
  512. type = TYPE_U32_STRING;
  513. else if (flags >= 8)
  514. type = TYPE_U16_STRING;
  515. else
  516. type = TYPE_U8_STRING;
  517. break;
  518. #endif
  519. case '%':
  520. type = TYPE_NONE;
  521. break;
  522. default:
  523. /* Unknown conversion character. */
  524. goto error;
  525. }
  526. }
  527. if (type != TYPE_NONE)
  528. {
  529. dp->arg_index = arg_index;
  530. if (dp->arg_index == ARG_NONE)
  531. {
  532. dp->arg_index = arg_posn++;
  533. if (dp->arg_index == ARG_NONE)
  534. /* arg_posn wrapped around. */
  535. goto error;
  536. }
  537. REGISTER_ARG (dp->arg_index, type);
  538. }
  539. dp->conversion = c;
  540. dp->dir_end = cp;
  541. }
  542. d->count++;
  543. if (d->count >= d_allocated)
  544. {
  545. size_t memory_size;
  546. DIRECTIVE *memory;
  547. d_allocated = xtimes (d_allocated, 2);
  548. memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
  549. if (size_overflow_p (memory_size))
  550. /* Overflow, would lead to out of memory. */
  551. goto out_of_memory;
  552. memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
  553. ? realloc (d->dir, memory_size)
  554. : malloc (memory_size));
  555. if (memory == NULL)
  556. /* Out of memory. */
  557. goto out_of_memory;
  558. if (d->dir == d->direct_alloc_dir)
  559. memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
  560. d->dir = memory;
  561. }
  562. }
  563. #if CHAR_T_ONLY_ASCII
  564. else if (!c_isascii (c))
  565. {
  566. /* Non-ASCII character. Not supported. */
  567. goto error;
  568. }
  569. #endif
  570. }
  571. d->dir[d->count].dir_start = cp;
  572. d->max_width_length = max_width_length;
  573. d->max_precision_length = max_precision_length;
  574. return 0;
  575. error:
  576. if (a->arg != a->direct_alloc_arg)
  577. free (a->arg);
  578. if (d->dir != d->direct_alloc_dir)
  579. free (d->dir);
  580. errno = EINVAL;
  581. return -1;
  582. out_of_memory:
  583. if (a->arg != a->direct_alloc_arg)
  584. free (a->arg);
  585. if (d->dir != d->direct_alloc_dir)
  586. free (d->dir);
  587. errno = ENOMEM;
  588. return -1;
  589. }
  590. #undef PRINTF_PARSE
  591. #undef DIRECTIVES
  592. #undef DIRECTIVE
  593. #undef CHAR_T_ONLY_ASCII
  594. #undef CHAR_T