123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- /* Copyright (C) 1997, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this software; see the file COPYING. If not, write to
- * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
- * Boston, MA 02111-1307 USA
- *
- * As a special exception, the Free Software Foundation gives permission
- * for additional uses of the text contained in its release of GUILE.
- *
- * The exception is that, if you link the GUILE library with other files
- * to produce an executable, this does not by itself cause the
- * resulting executable to be covered by the GNU General Public License.
- * Your use of that executable is in no way restricted on account of
- * linking the GUILE library code into it.
- *
- * This exception does not however invalidate any other reasons why
- * the executable file might be covered by the GNU General Public License.
- *
- * This exception applies only to the code released by the
- * Free Software Foundation under the name GUILE. If you copy
- * code from other Free Software Foundation releases into a copy of
- * GUILE, as the General Public License permits, the exception does
- * not apply to the code that you add in this way. To avoid misleading
- * anyone as to the status of such modified files, you must delete
- * this exception notice from them.
- *
- * If you write modifications of your own for GUILE, it is your choice
- * whether to permit this exception to apply to your modifications.
- * If you do not wish that, delete this exception notice.
- */
- /* regex-posix.c -- POSIX regular expression support.
- This code was written against Henry Spencer's famous regex package.
- The principal reference for POSIX behavior was the man page for this
- library, not the 1003.2 document itself. Ergo, other `POSIX'
- libraries which do not agree with the Spencer implementation may
- produce varying behavior. Sigh. */
- #include <stdio.h>
- #include <sys/types.h>
- #include "libguile/_scm.h"
- /* Supposedly, this file is never compiled unless we know we have
- POSIX regular expressions. But we still put this in an #ifdef so
- the file is CPP'able (for dependency scanning) even on systems that
- don't have a <regex.h> header. */
- #ifdef HAVE_REGCOMP
- #ifdef HAVE_REGEX_H
- #include <regex.h>
- #else
- #ifdef HAVE_RXPOSIX_H
- #include <rxposix.h> /* GNU Rx library */
- #else
- #ifdef HAVE_RX_RXPOSIX_H
- #include <rx/rxposix.h> /* GNU Rx library on Linux */
- #endif
- #endif
- #endif
- #endif
- #include "libguile/smob.h"
- #include "libguile/symbols.h"
- #include "libguile/vectors.h"
- #include "libguile/strports.h"
- #include "libguile/ports.h"
- #include "libguile/feature.h"
- #include "libguile/strings.h"
- #include "libguile/validate.h"
- #include "libguile/regex-posix.h"
- /* This is defined by some regex libraries and omitted by others. */
- #ifndef REG_BASIC
- #define REG_BASIC 0
- #endif
- long scm_tc16_regex;
- static scm_sizet
- free_regex (SCM obj)
- {
- regfree (SCM_RGX (obj));
- free (SCM_RGX (obj));
- return sizeof(regex_t);
- }
- SCM_SYMBOL (scm_regexp_error_key, "regular-expression-syntax");
- static char *
- scm_regexp_error_msg (int regerrno, regex_t *rx)
- {
- SCM errmsg;
- int l;
- /* FIXME: must we wrap any external calls in SCM_DEFER_INTS...SCM_ALLOW_INTS?
- Or are these only necessary when a SCM object may be left in an
- undetermined state (half-formed)? If the latter then I believe we
- may do without the critical section code. -twp */
- /* We could simply make errmsg a char pointer, and allocate space with
- malloc. But since we are about to pass the pointer to scm_error, which
- never returns, we would never have the opportunity to free it. Creating
- it as a SCM object means that the system will GC it at some point. */
- errmsg = scm_make_string (SCM_MAKINUM (80), SCM_UNDEFINED);
- SCM_DEFER_INTS;
- l = regerror (regerrno, rx, SCM_CHARS (errmsg), 80);
- if (l > 80)
- {
- errmsg = scm_make_string (SCM_MAKINUM (l), SCM_UNDEFINED);
- regerror (regerrno, rx, SCM_CHARS (errmsg), l);
- }
- SCM_ALLOW_INTS;
- return SCM_CHARS (errmsg);
- }
- SCM_DEFINE (scm_regexp_p, "regexp?", 1, 0, 0,
- (SCM x),
- "Return @code{#t} if @var{obj} is a compiled regular expression, or\n"
- "@code{#f} otherwise.")
- #define FUNC_NAME s_scm_regexp_p
- {
- return SCM_BOOL(SCM_RGXP (x));
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_make_regexp, "make-regexp", 1, 0, 1,
- (SCM pat, SCM flags),
- "Compile the regular expression described by @var{str}, and return the\n"
- "compiled regexp structure. If @var{str} does not describe a legal\n"
- "regular expression, @code{make-regexp} throws a\n"
- "@code{regular-expression-syntax} error.\n\n"
- "The @var{flag} arguments change the behavior of the compiled regexp.\n"
- "The following flags may be supplied:\n\n"
- "@table @code\n"
- "@item regexp/icase\n"
- "Consider uppercase and lowercase letters to be the same when matching.\n\n"
- "@item regexp/newline\n"
- "If a newline appears in the target string, then permit the @samp{^} and\n"
- "@samp{$} operators to match immediately after or immediately before the\n"
- "newline, respectively. Also, the @samp{.} and @samp{[^...]} operators\n"
- "will never match a newline character. The intent of this flag is to\n"
- "treat the target string as a buffer containing many lines of text, and\n"
- "the regular expression as a pattern that may match a single one of those\n"
- "lines.\n\n"
- "@item regexp/basic\n"
- "Compile a basic (``obsolete'') regexp instead of the extended\n"
- "(``modern'') regexps that are the default. Basic regexps do not\n"
- "consider @samp{|}, @samp{+} or @samp{?} to be special characters, and\n"
- "require the @samp{@{...@}} and @samp{(...)} metacharacters to be\n"
- "backslash-escaped (@pxref{Backslash Escapes}). There are several other\n"
- "differences between basic and extended regular expressions, but these\n"
- "are the most significant.\n\n"
- "@item regexp/extended\n"
- "Compile an extended regular expression rather than a basic regexp. This\n"
- "is the default behavior; this flag will not usually be needed. If a\n"
- "call to @code{make-regexp} includes both @code{regexp/basic} and\n"
- "@code{regexp/extended} flags, the one which comes last will override\n"
- "the earlier one.\n"
- "@end table\n")
- #define FUNC_NAME s_scm_make_regexp
- {
- SCM flag;
- regex_t *rx;
- int status, cflags;
- SCM_VALIDATE_ROSTRING (1,pat);
- SCM_VALIDATE_REST_ARGUMENT (flags);
- SCM_COERCE_SUBSTR (pat);
- /* Examine list of regexp flags. If REG_BASIC is supplied, then
- turn off REG_EXTENDED flag (on by default). */
- cflags = REG_EXTENDED;
- flag = flags;
- while (!SCM_NULLP (flag))
- {
- if (SCM_INUM (SCM_CAR (flag)) == REG_BASIC)
- cflags &= ~REG_EXTENDED;
- else
- cflags |= SCM_INUM (SCM_CAR (flag));
- flag = SCM_CDR (flag);
- }
-
- rx = SCM_MUST_MALLOC_TYPE (regex_t);
- status = regcomp (rx, SCM_ROCHARS (pat),
- /* Make sure they're not passing REG_NOSUB;
- regexp-exec assumes we're getting match data. */
- cflags & ~REG_NOSUB);
- if (status != 0)
- {
- scm_error (scm_regexp_error_key,
- FUNC_NAME,
- scm_regexp_error_msg (status, rx),
- SCM_BOOL_F,
- SCM_BOOL_F);
- /* never returns */
- }
- SCM_RETURN_NEWSMOB (scm_tc16_regex, rx);
- }
- #undef FUNC_NAME
- SCM_DEFINE (scm_regexp_exec, "regexp-exec", 2, 2, 0,
- (SCM rx, SCM str, SCM start, SCM flags),
- "Match the compiled regular expression @var{regexp} against @code{str}.\n"
- "If the optional integer @var{start} argument is provided, begin matching\n"
- "from that position in the string. Return a match structure describing\n"
- "the results of the match, or @code{#f} if no match could be found.")
- #define FUNC_NAME s_scm_regexp_exec
- {
- int status, nmatches, offset;
- regmatch_t *matches;
- SCM mvec = SCM_BOOL_F;
- SCM_VALIDATE_RGXP (1,rx);
- SCM_VALIDATE_ROSTRING (2,str);
- SCM_VALIDATE_INUM_DEF_COPY (3,start,0,offset);
- SCM_ASSERT_RANGE (3,start,offset >= 0 && (unsigned) offset <= SCM_LENGTH (str));
- if (SCM_UNBNDP (flags))
- flags = SCM_INUM0;
- SCM_VALIDATE_INUM (4,flags);
- SCM_COERCE_SUBSTR (str);
- /* re_nsub doesn't account for the `subexpression' representing the
- whole regexp, so add 1 to nmatches. */
- nmatches = SCM_RGX(rx)->re_nsub + 1;
- SCM_DEFER_INTS;
- matches = SCM_MUST_MALLOC_TYPE_NUM (regmatch_t,nmatches);
- status = regexec (SCM_RGX (rx), SCM_ROCHARS (str) + offset,
- nmatches, matches,
- SCM_INUM (flags));
- if (!status)
- {
- int i;
- /* The match vector must include a cell for the string that was matched,
- so add 1. */
- mvec = scm_make_vector (SCM_MAKINUM (nmatches + 1), SCM_UNSPECIFIED);
- SCM_VELTS(mvec)[0] = str;
- for (i = 0; i < nmatches; ++i)
- if (matches[i].rm_so == -1)
- SCM_VELTS(mvec)[i+1] = scm_cons (SCM_MAKINUM (-1), SCM_MAKINUM (-1));
- else
- SCM_VELTS(mvec)[i+1]
- = scm_cons(SCM_MAKINUM(matches[i].rm_so + offset),
- SCM_MAKINUM(matches[i].rm_eo + offset));
- }
- scm_must_free ((char *) matches);
- SCM_ALLOW_INTS;
- if (status != 0 && status != REG_NOMATCH)
- scm_error (scm_regexp_error_key,
- FUNC_NAME,
- scm_regexp_error_msg (status, SCM_RGX (rx)),
- SCM_BOOL_F,
- SCM_BOOL_F);
- return mvec;
- }
- #undef FUNC_NAME
- void
- scm_init_regex_posix ()
- {
- scm_tc16_regex = scm_make_smob_type_mfpe ("regexp", sizeof (regex_t),
- NULL, free_regex, NULL, NULL);
- /* Compilation flags. */
- scm_sysintern ("regexp/basic", scm_long2num (REG_BASIC));
- scm_sysintern ("regexp/extended", scm_long2num (REG_EXTENDED));
- scm_sysintern ("regexp/icase", scm_long2num (REG_ICASE));
- scm_sysintern ("regexp/newline", scm_long2num (REG_NEWLINE));
- /* Execution flags. */
- scm_sysintern ("regexp/notbol", scm_long2num (REG_NOTBOL));
- scm_sysintern ("regexp/noteol", scm_long2num (REG_NOTEOL));
- #include "libguile/regex-posix.x"
- scm_add_feature ("regex");
- }
- /*
- Local Variables:
- c-file-style: "gnu"
- End:
- */
|