123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569 |
- /*
- * The information in this document is subject to change
- * without notice and should not be construed as a commitment
- * by Digital Equipment Corporation or by DECUS.
- *
- * Neither Digital Equipment Corporation, DECUS, nor the authors
- * assume any responsibility for the use or reliability of this
- * document or the described software.
- *
- * Copyright (C) 1980, DECUS
- *
- * General permission to copy or modify, but not for profit, is
- * hereby granted, provided that the above copyright notice is
- * included and reference made to the fact that reproduction
- * privileges were granted by DECUS.
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <ctype.h> // tolower()
- /*
- * grep
- *
- * Runs on the Decus compiler or on vms, On vms, define as:
- * grep :== "$disk:[account]grep" (native)
- * grep :== "$disk:[account]grep grep" (Decus)
- * See below for more information.
- */
- char *documentation[] = {
- "grep searches a file for a given pattern. Execute by",
- " grep [flags] regular_expression file_list\n",
- "Flags are single characters preceded by '-':",
- " -c Only a count of matching lines is printed",
- " -f Print file name for matching lines switch, see below",
- " -n Each line is preceded by its line number",
- " -v Only print non-matching lines\n",
- "The file_list is a list of files (wildcards are acceptable on RSX modes).",
- "\nThe file name is normally printed if there is a file given.",
- "The -f flag reverses this action (print name no file, not if more).\n",
- 0 };
- char *patdoc[] = {
- "The regular_expression defines the pattern to search for. Upper- and",
- "lower-case are always ignored. Blank lines never match. The expression",
- "should be quoted to prevent file-name translation.",
- "x An ordinary character (not mentioned below) matches that character.",
- "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
- "'^' A circumflex at the beginning of an expression matches the",
- " beginning of a line.",
- "'$' A dollar-sign at the end of an expression matches the end of a line.",
- "'.' A period matches any character except \"new-line\".",
- "':a' A colon matches a class of characters described by the following",
- "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
- "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
- "': ' other control characters, such as new-line.",
- "'*' An expression followed by an asterisk matches zero or more",
- " occurrences of that expression: \"fo*\" matches \"f\", \"fo\"",
- " \"foo\", etc.",
- "'+' An expression followed by a plus sign matches one or more",
- " occurrences of that expression: \"fo+\" matches \"fo\", etc.",
- "'-' An expression followed by a minus sign optionally matches",
- " the expression.",
- "'[]' A string enclosed in square brackets matches any character in",
- " that string, but no others. If the first character in the",
- " string is a circumflex, the expression matches any character",
- " except \"new-line\" and the characters in the string. For",
- " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
- " matches \"abc\" but not \"axb\". A range of characters may be",
- " specified by two characters separated by \"-\". Note that,",
- " [a-z] matches alphabetics, while [z-a] never matches.",
- "The concatenation of regular expressions is a regular expression.",
- 0};
- #define LMAX 512
- #define PMAX 256
- #define CHAR 1
- #define BOL 2
- #define EOL 3
- #define ANY 4
- #define CLASS 5
- #define NCLASS 6
- #define STAR 7
- #define PLUS 8
- #define MINUS 9
- #define ALPHA 10
- #define DIGIT 11
- #define NALPHA 12
- #define PUNCT 13
- #define RANGE 14
- #define ENDPAT 15
- int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
- char *pp, lbuf[LMAX], pbuf[PMAX];
- char *cclass();
- char *pmatch();
- void store(int);
- void error(char *);
- void badpat(char *, char *, char *);
- int match(void);
- /*** Display a file name *******************************/
- void file(char *s)
- {
- printf("File %s:\n", s);
- }
- /*** Report unopenable file ****************************/
- void cant(char *s)
- {
- fprintf(stderr, "%s: cannot open\n", s);
- }
- /*** Give good help ************************************/
- void help(char **hp)
- {
- char **dp;
- for (dp = hp; *dp; ++dp)
- printf("%s\n", *dp);
- }
- /*** Display usage summary *****************************/
- void usage(char *s)
- {
- fprintf(stderr, "?GREP-E-%s\n", s);
- fprintf(stderr,
- "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
- exit(1);
- }
- /*** Compile the pattern into global pbuf[] ************/
- void compile(char *source)
- {
- char *s; /* Source string pointer */
- char *lp; /* Last pattern pointer */
- int c; /* Current character */
- int o; /* Temp */
- char *spp; /* Save beginning of pattern */
- s = source;
- if (debug)
- printf("Pattern = \"%s\"\n", s);
- pp = pbuf;
- while (c = *s++) {
- /*
- * STAR, PLUS and MINUS are special.
- */
- if (c == '*' || c == '+' || c == '-') {
- if (pp == pbuf ||
- (o=pp[-1]) == BOL ||
- o == EOL ||
- o == STAR ||
- o == PLUS ||
- o == MINUS)
- badpat("Illegal occurrence op.", source, s);
- store(ENDPAT);
- store(ENDPAT);
- spp = pp; /* Save pattern end */
- while (--pp > lp) /* Move pattern down */
- *pp = pp[-1]; /* one byte */
- *pp = (c == '*') ? STAR :
- (c == '-') ? MINUS : PLUS;
- pp = spp; /* Restore pattern end */
- continue;
- }
- /*
- * All the rest.
- */
- lp = pp; /* Remember start */
- switch(c) {
- case '^':
- store(BOL);
- break;
- case '$':
- store(EOL);
- break;
- case '.':
- store(ANY);
- break;
- case '[':
- s = cclass(source, s);
- break;
- case ':':
- if (*s) {
- switch(tolower(c = *s++)) {
- case 'a':
- case 'A':
- store(ALPHA);
- break;
- case 'd':
- case 'D':
- store(DIGIT);
- break;
- case 'n':
- case 'N':
- store(NALPHA);
- break;
- case ' ':
- store(PUNCT);
- break;
- default:
- badpat("Unknown : type", source, s);
- }
- break;
- }
- else badpat("No : type", source, s);
- case '\\':
- if (*s)
- c = *s++;
- default:
- store(CHAR);
- store(tolower(c));
- }
- }
- store(ENDPAT);
- store(0); /* Terminate string */
- if (debug) {
- for (lp = pbuf; lp < pp;) {
- if ((c = (*lp++ & 0377)) < ' ')
- printf("\\%o ", c);
- else printf("%c ", c);
- }
- printf("\n");
- }
- }
- /*** Compile a class (within []) ***********************/
- char *cclass(char *source, char *src)
- /* char *source; // Pattern start -- for error msg. */
- /* char *src; // Class start */
- {
- char *s; /* Source pointer */
- char *cp; /* Pattern start */
- int c; /* Current character */
- int o; /* Temp */
- s = src;
- o = CLASS;
- if (*s == '^') {
- ++s;
- o = NCLASS;
- }
- store(o);
- cp = pp;
- store(0); /* Byte count */
- while ((c = *s++) && c!=']') {
- if (c == '\\') { /* Store quoted char */
- if ((c = *s++) == '\0') /* Gotta get something */
- badpat("Class terminates badly", source, s);
- else store(tolower(c));
- }
- else if (c == '-' &&
- (pp - cp) > 1 && *s != ']' && *s != '\0') {
- c = pp[-1]; /* Range start */
- pp[-1] = RANGE; /* Range signal */
- store(c); /* Re-store start */
- c = *s++; /* Get end char and*/
- store(tolower(c)); /* Store it */
- }
- else {
- store(tolower(c)); /* Store normal char */
- }
- }
- if (c != ']')
- badpat("Unterminated class", source, s);
- if ((c = (pp - cp)) >= 256)
- badpat("Class too large", source, s);
- if (c == 0)
- badpat("Empty class", source, s);
- *cp = c;
- return(s);
- }
- /*** Store an entry in the pattern buffer **************/
- void store(int op)
- {
- if (pp >= &pbuf[PMAX])
- error("Pattern too complex\n");
- *pp++ = op;
- }
- /*** Report a bad pattern specification ****************/
- void badpat(char *message, char *source, char *stop)
- /* char *message; // Error message */
- /* char *source; // Pattern start */
- /* char *stop; // Pattern end */
- {
- fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
- fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n",
- stop-source, stop[-1]);
- error("?GREP-E-Bad pattern\n");
- }
- /*** Scan the file for the pattern in pbuf[] ***********/
- void grep(FILE *fp, char *fn)
- /* FILE *fp; // File to process */
- /* char *fn; // File name (for -f option) */
- {
- int lno, count, m;
- lno = 0;
- count = 0;
- while (fgets(lbuf, LMAX, fp)) {
- ++lno;
- m = match();
- if ((m && !vflag) || (!m && vflag)) {
- ++count;
- if (!cflag) {
- if (fflag && fn) {
- file(fn);
- fn = 0;
- }
- if (nflag)
- printf("%d\t", lno);
- printf("%s\n", lbuf);
- }
- }
- }
- if (cflag) {
- if (fflag && fn)
- file(fn);
- printf("%d\n", count);
- }
- }
- /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
- int match()
- {
- char *l; /* Line pointer */
- for (l = lbuf; *l; ++l) {
- if (pmatch(l, pbuf))
- return(1);
- }
- return(0);
- }
- /*** Match partial line with pattern *******************/
- char *pmatch(char *line, char *pattern)
- /* char *line; // (partial) line to match */
- /* char *pattern; // (partial) pattern to match */
- {
- char *l; /* Current line pointer */
- char *p; /* Current pattern pointer */
- char c; /* Current character */
- char *e; /* End for STAR and PLUS match */
- int op; /* Pattern operation */
- int n; /* Class counter */
- char *are; /* Start of STAR match */
- l = line;
- if (debug > 1)
- printf("pmatch(\"%s\")\n", line);
- p = pattern;
- while ((op = *p++) != ENDPAT) {
- if (debug > 1)
- printf("byte[%ld] = 0%o, '%c', op = 0%o\n",
- l-line, *l, *l, op);
- switch(op) {
- case CHAR:
- if (tolower(*l++) != *p++)
- return(0);
- break;
- case BOL:
- if (l != lbuf)
- return(0);
- break;
- case EOL:
- if (*l != '\0')
- return(0);
- break;
- case ANY:
- if (*l++ == '\0')
- return(0);
- break;
- case DIGIT:
- if ((c = *l++) < '0' || (c > '9'))
- return(0);
- break;
- case ALPHA:
- c = tolower(*l++);
- if (c < 'a' || c > 'z')
- return(0);
- break;
- case NALPHA:
- c = tolower(*l++);
- if (c >= 'a' && c <= 'z')
- break;
- else if (c < '0' || c > '9')
- return(0);
- break;
- case PUNCT:
- c = *l++;
- if (c == 0 || c > ' ')
- return(0);
- break;
- case CLASS:
- case NCLASS:
- c = tolower(*l++);
- n = *p++ & 0377;
- do {
- if (*p == RANGE) {
- p += 3;
- n -= 2;
- if (c >= p[-2] && c <= p[-1])
- break;
- }
- else if (c == *p++)
- break;
- } while (--n > 1);
- if ((op == CLASS) == (n <= 1))
- return(0);
- if (op == CLASS)
- p += n - 2;
- break;
- case MINUS:
- e = pmatch(l, p); /* Look for a match */
- while (*p++ != ENDPAT); /* Skip over pattern */
- if (e) /* Got a match? */
- l = e; /* Yes, update string */
- break; /* Always succeeds */
- case PLUS: /* One or more ... */
- if ((l = pmatch(l, p)) == 0)
- return(0); /* Gotta have a match */
- case STAR: /* Zero or more ... */
- are = l; /* Remember line start */
- while (*l && (e = pmatch(l, p)))
- l = e; /* Get longest match */
- while (*p++ != ENDPAT); /* Skip over pattern */
- while (l >= are) { /* Try to match rest */
- if (e = pmatch(l, p))
- return(e);
- --l; /* Nope, try earlier */
- }
- return(0); /* Nothing else worked */
- default:
- printf("Bad op code %d\n", op);
- error("Cannot happen -- match\n");
- }
- }
- return(l);
- }
- /*** Report an error ***********************************/
- void error(char *s)
- {
- fprintf(stderr, "%s", s);
- exit(1);
- }
- /*** Main program - parse arguments & grep *************/
- int main(int argc, char **argv)
- {
- char *p;
- int c, i;
- int gotpattern;
- FILE *f;
- if (argc <= 1)
- usage("No arguments");
- if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
- help(documentation);
- help(patdoc);
- return 0;
- }
- nfile = argc-1;
- gotpattern = 0;
- for (i=1; i < argc; ++i) {
- p = argv[i];
- if (*p == '-') {
- ++p;
- while (c = *p++) {
- switch(tolower(c)) {
- case '?':
- help(documentation);
- break;
- case 'C':
- case 'c':
- ++cflag;
- break;
- case 'D':
- case 'd':
- ++debug;
- break;
- case 'F':
- case 'f':
- ++fflag;
- break;
- case 'n':
- case 'N':
- ++nflag;
- break;
- case 'v':
- case 'V':
- ++vflag;
- break;
- default:
- usage("Unknown flag");
- }
- }
- argv[i] = 0;
- --nfile;
- } else if (!gotpattern) {
- compile(p);
- argv[i] = 0;
- ++gotpattern;
- --nfile;
- }
- }
- if (!gotpattern)
- usage("No pattern");
- if (nfile == 0)
- grep(stdin, 0);
- else {
- fflag = fflag ^ (nfile > 0);
- for (i=1; i < argc; ++i) {
- if (p = argv[i]) {
- if ((f=fopen(p, "r")) == NULL)
- cant(p);
- else {
- grep(f, p);
- fclose(f);
- }
- }
- }
- }
- return 0;
- }
- /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/
|