regex.m4 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. # serial 71
  2. # Copyright (C) 1996-2001, 2003-2021 Free Software Foundation, Inc.
  3. #
  4. # This file is free software; the Free Software Foundation
  5. # gives unlimited permission to copy and/or distribute it,
  6. # with or without modifications, as long as this notice is preserved.
  7. dnl Initially derived from code in GNU grep.
  8. dnl Mostly written by Jim Meyering.
  9. AC_PREREQ([2.50])
  10. AC_DEFUN([gl_REGEX],
  11. [
  12. AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
  13. AC_ARG_WITH([included-regex],
  14. [AS_HELP_STRING([--without-included-regex],
  15. [don't compile regex; this is the default on systems
  16. with recent-enough versions of the GNU C Library
  17. (use with caution on other systems).])])
  18. case $with_included_regex in #(
  19. yes|no) ac_use_included_regex=$with_included_regex
  20. ;;
  21. '')
  22. # If the system regex support is good enough that it passes the
  23. # following run test, then default to *not* using the included regex.c.
  24. # If cross compiling, assume the test would fail and use the included
  25. # regex.c.
  26. AC_CHECK_DECLS_ONCE([alarm])
  27. AC_CHECK_HEADERS_ONCE([malloc.h])
  28. AC_CACHE_CHECK([for working re_compile_pattern],
  29. [gl_cv_func_re_compile_pattern_working],
  30. [AC_RUN_IFELSE(
  31. [AC_LANG_PROGRAM(
  32. [[#include <regex.h>
  33. #include <locale.h>
  34. #include <limits.h>
  35. #include <string.h>
  36. #if defined M_CHECK_ACTION || HAVE_DECL_ALARM
  37. # include <signal.h>
  38. # include <unistd.h>
  39. #endif
  40. #if HAVE_MALLOC_H
  41. # include <malloc.h>
  42. #endif
  43. #ifdef M_CHECK_ACTION
  44. /* Exit with distinguishable exit code. */
  45. static void sigabrt_no_core (int sig) { raise (SIGTERM); }
  46. #endif
  47. ]],
  48. [[int result = 0;
  49. static struct re_pattern_buffer regex;
  50. unsigned char folded_chars[UCHAR_MAX + 1];
  51. int i;
  52. const char *s;
  53. struct re_registers regs;
  54. /* Some builds of glibc go into an infinite loop on this
  55. test. Use alarm to force death, and mallopt to avoid
  56. malloc recursion in diagnosing the corrupted heap. */
  57. #if HAVE_DECL_ALARM
  58. signal (SIGALRM, SIG_DFL);
  59. alarm (2);
  60. #endif
  61. #ifdef M_CHECK_ACTION
  62. signal (SIGABRT, sigabrt_no_core);
  63. mallopt (M_CHECK_ACTION, 2);
  64. #endif
  65. if (setlocale (LC_ALL, "en_US.UTF-8"))
  66. {
  67. {
  68. /* https://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
  69. This test needs valgrind to catch the bug on Debian
  70. GNU/Linux 3.1 x86, but it might catch the bug better
  71. on other platforms and it shouldn't hurt to try the
  72. test here. */
  73. static char const pat[] = "insert into";
  74. static char const data[] =
  75. "\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
  76. re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
  77. | RE_ICASE);
  78. memset (&regex, 0, sizeof regex);
  79. s = re_compile_pattern (pat, sizeof pat - 1, &regex);
  80. if (s)
  81. result |= 1;
  82. else
  83. {
  84. if (re_search (&regex, data, sizeof data - 1,
  85. 0, sizeof data - 1, &regs)
  86. != -1)
  87. result |= 1;
  88. regfree (&regex);
  89. }
  90. }
  91. {
  92. /* This test is from glibc bug 15078.
  93. The test case is from Andreas Schwab in
  94. <https://sourceware.org/ml/libc-alpha/2013-01/msg00967.html>.
  95. */
  96. static char const pat[] = "[^x]x";
  97. static char const data[] =
  98. /* <U1000><U103B><U103D><U1014><U103A><U102F><U1015><U103A> */
  99. "\xe1\x80\x80"
  100. "\xe1\x80\xbb"
  101. "\xe1\x80\xbd"
  102. "\xe1\x80\x94"
  103. "\xe1\x80\xba"
  104. "\xe1\x80\xaf"
  105. "\xe1\x80\x95"
  106. "\xe1\x80\xba"
  107. "x";
  108. re_set_syntax (0);
  109. memset (&regex, 0, sizeof regex);
  110. s = re_compile_pattern (pat, sizeof pat - 1, &regex);
  111. if (s)
  112. result |= 1;
  113. else
  114. {
  115. i = re_search (&regex, data, sizeof data - 1,
  116. 0, sizeof data - 1, 0);
  117. if (i != 0 && i != 21)
  118. result |= 1;
  119. regfree (&regex);
  120. }
  121. }
  122. if (! setlocale (LC_ALL, "C"))
  123. return 1;
  124. }
  125. /* This test is from glibc bug 3957, reported by Andrew Mackey. */
  126. re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
  127. memset (&regex, 0, sizeof regex);
  128. s = re_compile_pattern ("a[^x]b", 6, &regex);
  129. if (s)
  130. result |= 2;
  131. else
  132. {
  133. /* This should fail, but succeeds for glibc-2.5. */
  134. if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
  135. result |= 2;
  136. regfree (&regex);
  137. }
  138. /* This regular expression is from Spencer ere test number 75
  139. in grep-2.3. */
  140. re_set_syntax (RE_SYNTAX_POSIX_EGREP);
  141. memset (&regex, 0, sizeof regex);
  142. for (i = 0; i <= UCHAR_MAX; i++)
  143. folded_chars[i] = i;
  144. regex.translate = folded_chars;
  145. s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
  146. /* This should fail with _Invalid character class name_ error. */
  147. if (!s)
  148. {
  149. result |= 4;
  150. regfree (&regex);
  151. }
  152. /* Ensure that [b-a] is diagnosed as invalid, when
  153. using RE_NO_EMPTY_RANGES. */
  154. re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
  155. memset (&regex, 0, sizeof regex);
  156. s = re_compile_pattern ("a[b-a]", 6, &regex);
  157. if (s == 0)
  158. {
  159. result |= 8;
  160. regfree (&regex);
  161. }
  162. /* This should succeed, but does not for glibc-2.1.3. */
  163. memset (&regex, 0, sizeof regex);
  164. s = re_compile_pattern ("{1", 2, &regex);
  165. if (s)
  166. result |= 8;
  167. else
  168. regfree (&regex);
  169. /* The following example is derived from a problem report
  170. against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>. */
  171. memset (&regex, 0, sizeof regex);
  172. s = re_compile_pattern ("[an\371]*n", 7, &regex);
  173. if (s)
  174. result |= 8;
  175. else
  176. {
  177. /* This should match, but does not for glibc-2.2.1. */
  178. if (re_match (&regex, "an", 2, 0, &regs) != 2)
  179. result |= 8;
  180. else
  181. {
  182. free (regs.start);
  183. free (regs.end);
  184. }
  185. regfree (&regex);
  186. }
  187. memset (&regex, 0, sizeof regex);
  188. s = re_compile_pattern ("x", 1, &regex);
  189. if (s)
  190. result |= 8;
  191. else
  192. {
  193. /* glibc-2.2.93 does not work with a negative RANGE argument. */
  194. if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
  195. result |= 8;
  196. else
  197. {
  198. free (regs.start);
  199. free (regs.end);
  200. }
  201. regfree (&regex);
  202. }
  203. /* The version of regex.c in older versions of gnulib
  204. ignored RE_ICASE. Detect that problem too. */
  205. re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
  206. memset (&regex, 0, sizeof regex);
  207. s = re_compile_pattern ("x", 1, &regex);
  208. if (s)
  209. result |= 16;
  210. else
  211. {
  212. if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
  213. result |= 16;
  214. else
  215. {
  216. free (regs.start);
  217. free (regs.end);
  218. }
  219. regfree (&regex);
  220. }
  221. /* Catch a bug reported by Vin Shelton in
  222. https://lists.gnu.org/r/bug-coreutils/2007-06/msg00089.html
  223. */
  224. re_set_syntax (RE_SYNTAX_POSIX_BASIC
  225. & ~RE_CONTEXT_INVALID_DUP
  226. & ~RE_NO_EMPTY_RANGES);
  227. memset (&regex, 0, sizeof regex);
  228. s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
  229. if (s)
  230. result |= 32;
  231. else
  232. regfree (&regex);
  233. /* REG_STARTEND was added to glibc on 2004-01-15.
  234. Reject older versions. */
  235. if (! REG_STARTEND)
  236. result |= 64;
  237. /* Matching with the compiled form of this regexp would provoke
  238. an assertion failure prior to glibc-2.28:
  239. regexec.c:1375: pop_fail_stack: Assertion 'num >= 0' failed
  240. With glibc-2.28, compilation fails and reports the invalid
  241. back reference. */
  242. re_set_syntax (RE_SYNTAX_POSIX_EGREP);
  243. memset (&regex, 0, sizeof regex);
  244. s = re_compile_pattern ("0|()0|\\1|0", 10, &regex);
  245. if (!s)
  246. result |= 64;
  247. else
  248. {
  249. if (strcmp (s, "Invalid back reference"))
  250. result |= 64;
  251. regfree (&regex);
  252. }
  253. #if 0
  254. /* It would be nice to reject hosts whose regoff_t values are too
  255. narrow (including glibc on hosts with 64-bit ptrdiff_t and
  256. 32-bit int), but we should wait until glibc implements this
  257. feature. Otherwise, support for equivalence classes and
  258. multibyte collation symbols would always be broken except
  259. when compiling --without-included-regex. */
  260. if (sizeof (regoff_t) < sizeof (ptrdiff_t)
  261. || sizeof (regoff_t) < sizeof (ssize_t))
  262. result |= 64;
  263. #endif
  264. return result;
  265. ]])],
  266. [gl_cv_func_re_compile_pattern_working=yes],
  267. [gl_cv_func_re_compile_pattern_working=no],
  268. [case "$host_os" in
  269. # Guess no on native Windows.
  270. mingw*) gl_cv_func_re_compile_pattern_working="guessing no" ;;
  271. # Otherwise obey --enable-cross-guesses.
  272. *) gl_cv_func_re_compile_pattern_working="$gl_cross_guess_normal" ;;
  273. esac
  274. ])
  275. ])
  276. case "$gl_cv_func_re_compile_pattern_working" in #(
  277. *yes) ac_use_included_regex=no;; #(
  278. *no) ac_use_included_regex=yes;;
  279. esac
  280. ;;
  281. *) AC_MSG_ERROR([Invalid value for --with-included-regex: $with_included_regex])
  282. ;;
  283. esac
  284. if test $ac_use_included_regex = yes; then
  285. AC_DEFINE([_REGEX_INCLUDE_LIMITS_H], [1],
  286. [Define if you want <regex.h> to include <limits.h>, so that it
  287. consistently overrides <limits.h>'s RE_DUP_MAX.])
  288. AC_DEFINE([_REGEX_LARGE_OFFSETS], [1],
  289. [Define if you want regoff_t to be at least as wide POSIX requires.])
  290. AC_DEFINE([re_syntax_options], [rpl_re_syntax_options],
  291. [Define to rpl_re_syntax_options if the replacement should be used.])
  292. AC_DEFINE([re_set_syntax], [rpl_re_set_syntax],
  293. [Define to rpl_re_set_syntax if the replacement should be used.])
  294. AC_DEFINE([re_compile_pattern], [rpl_re_compile_pattern],
  295. [Define to rpl_re_compile_pattern if the replacement should be used.])
  296. AC_DEFINE([re_compile_fastmap], [rpl_re_compile_fastmap],
  297. [Define to rpl_re_compile_fastmap if the replacement should be used.])
  298. AC_DEFINE([re_search], [rpl_re_search],
  299. [Define to rpl_re_search if the replacement should be used.])
  300. AC_DEFINE([re_search_2], [rpl_re_search_2],
  301. [Define to rpl_re_search_2 if the replacement should be used.])
  302. AC_DEFINE([re_match], [rpl_re_match],
  303. [Define to rpl_re_match if the replacement should be used.])
  304. AC_DEFINE([re_match_2], [rpl_re_match_2],
  305. [Define to rpl_re_match_2 if the replacement should be used.])
  306. AC_DEFINE([re_set_registers], [rpl_re_set_registers],
  307. [Define to rpl_re_set_registers if the replacement should be used.])
  308. AC_DEFINE([re_comp], [rpl_re_comp],
  309. [Define to rpl_re_comp if the replacement should be used.])
  310. AC_DEFINE([re_exec], [rpl_re_exec],
  311. [Define to rpl_re_exec if the replacement should be used.])
  312. AC_DEFINE([regcomp], [rpl_regcomp],
  313. [Define to rpl_regcomp if the replacement should be used.])
  314. AC_DEFINE([regexec], [rpl_regexec],
  315. [Define to rpl_regexec if the replacement should be used.])
  316. AC_DEFINE([regerror], [rpl_regerror],
  317. [Define to rpl_regerror if the replacement should be used.])
  318. AC_DEFINE([regfree], [rpl_regfree],
  319. [Define to rpl_regfree if the replacement should be used.])
  320. fi
  321. ])
  322. # Prerequisites of lib/regex.c and lib/regex_internal.c.
  323. AC_DEFUN([gl_PREREQ_REGEX],
  324. [
  325. AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
  326. AC_REQUIRE([AC_C_INLINE])
  327. AC_REQUIRE([AC_C_RESTRICT])
  328. AC_REQUIRE([AC_TYPE_MBSTATE_T])
  329. AC_REQUIRE([gl_EEMALLOC])
  330. AC_CHECK_HEADERS([libintl.h])
  331. AC_CHECK_FUNCS_ONCE([isblank iswctype])
  332. AC_CHECK_DECLS([isblank], [], [], [[#include <ctype.h>]])
  333. ])