46_grep.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. /*
  2. * The information in this document is subject to change
  3. * without notice and should not be construed as a commitment
  4. * by Digital Equipment Corporation or by DECUS.
  5. *
  6. * Neither Digital Equipment Corporation, DECUS, nor the authors
  7. * assume any responsibility for the use or reliability of this
  8. * document or the described software.
  9. *
  10. * Copyright (C) 1980, DECUS
  11. *
  12. * General permission to copy or modify, but not for profit, is
  13. * hereby granted, provided that the above copyright notice is
  14. * included and reference made to the fact that reproduction
  15. * privileges were granted by DECUS.
  16. */
  17. #include <stdio.h>
  18. #include <stdlib.h>
  19. #include <ctype.h> // tolower()
  20. /*
  21. * grep
  22. *
  23. * Runs on the Decus compiler or on vms, On vms, define as:
  24. * grep :== "$disk:[account]grep" (native)
  25. * grep :== "$disk:[account]grep grep" (Decus)
  26. * See below for more information.
  27. */
  28. char *documentation[] = {
  29. "grep searches a file for a given pattern. Execute by",
  30. " grep [flags] regular_expression file_list\n",
  31. "Flags are single characters preceded by '-':",
  32. " -c Only a count of matching lines is printed",
  33. " -f Print file name for matching lines switch, see below",
  34. " -n Each line is preceded by its line number",
  35. " -v Only print non-matching lines\n",
  36. "The file_list is a list of files (wildcards are acceptable on RSX modes).",
  37. "\nThe file name is normally printed if there is a file given.",
  38. "The -f flag reverses this action (print name no file, not if more).\n",
  39. 0 };
  40. char *patdoc[] = {
  41. "The regular_expression defines the pattern to search for. Upper- and",
  42. "lower-case are always ignored. Blank lines never match. The expression",
  43. "should be quoted to prevent file-name translation.",
  44. "x An ordinary character (not mentioned below) matches that character.",
  45. "'\\' The backslash quotes any character. \"\\$\" matches a dollar-sign.",
  46. "'^' A circumflex at the beginning of an expression matches the",
  47. " beginning of a line.",
  48. "'$' A dollar-sign at the end of an expression matches the end of a line.",
  49. "'.' A period matches any character except \"new-line\".",
  50. "':a' A colon matches a class of characters described by the following",
  51. "':d' character. \":a\" matches any alphabetic, \":d\" matches digits,",
  52. "':n' \":n\" matches alphanumerics, \": \" matches spaces, tabs, and",
  53. "': ' other control characters, such as new-line.",
  54. "'*' An expression followed by an asterisk matches zero or more",
  55. " occurrences of that expression: \"fo*\" matches \"f\", \"fo\"",
  56. " \"foo\", etc.",
  57. "'+' An expression followed by a plus sign matches one or more",
  58. " occurrences of that expression: \"fo+\" matches \"fo\", etc.",
  59. "'-' An expression followed by a minus sign optionally matches",
  60. " the expression.",
  61. "'[]' A string enclosed in square brackets matches any character in",
  62. " that string, but no others. If the first character in the",
  63. " string is a circumflex, the expression matches any character",
  64. " except \"new-line\" and the characters in the string. For",
  65. " example, \"[xyz]\" matches \"xx\" and \"zyx\", while \"[^xyz]\"",
  66. " matches \"abc\" but not \"axb\". A range of characters may be",
  67. " specified by two characters separated by \"-\". Note that,",
  68. " [a-z] matches alphabetics, while [z-a] never matches.",
  69. "The concatenation of regular expressions is a regular expression.",
  70. 0};
  71. #define LMAX 512
  72. #define PMAX 256
  73. #define CHAR 1
  74. #define BOL 2
  75. #define EOL 3
  76. #define ANY 4
  77. #define CLASS 5
  78. #define NCLASS 6
  79. #define STAR 7
  80. #define PLUS 8
  81. #define MINUS 9
  82. #define ALPHA 10
  83. #define DIGIT 11
  84. #define NALPHA 12
  85. #define PUNCT 13
  86. #define RANGE 14
  87. #define ENDPAT 15
  88. int cflag=0, fflag=0, nflag=0, vflag=0, nfile=0, debug=0;
  89. char *pp, lbuf[LMAX], pbuf[PMAX];
  90. char *cclass();
  91. char *pmatch();
  92. void store(int);
  93. void error(char *);
  94. void badpat(char *, char *, char *);
  95. int match(void);
  96. /*** Display a file name *******************************/
  97. void file(char *s)
  98. {
  99. printf("File %s:\n", s);
  100. }
  101. /*** Report unopenable file ****************************/
  102. void cant(char *s)
  103. {
  104. fprintf(stderr, "%s: cannot open\n", s);
  105. }
  106. /*** Give good help ************************************/
  107. void help(char **hp)
  108. {
  109. char **dp;
  110. for (dp = hp; *dp; ++dp)
  111. printf("%s\n", *dp);
  112. }
  113. /*** Display usage summary *****************************/
  114. void usage(char *s)
  115. {
  116. fprintf(stderr, "?GREP-E-%s\n", s);
  117. fprintf(stderr,
  118. "Usage: grep [-cfnv] pattern [file ...]. grep ? for help\n");
  119. exit(1);
  120. }
  121. /*** Compile the pattern into global pbuf[] ************/
  122. void compile(char *source)
  123. {
  124. char *s; /* Source string pointer */
  125. char *lp; /* Last pattern pointer */
  126. int c; /* Current character */
  127. int o; /* Temp */
  128. char *spp; /* Save beginning of pattern */
  129. s = source;
  130. if (debug)
  131. printf("Pattern = \"%s\"\n", s);
  132. pp = pbuf;
  133. while (c = *s++) {
  134. /*
  135. * STAR, PLUS and MINUS are special.
  136. */
  137. if (c == '*' || c == '+' || c == '-') {
  138. if (pp == pbuf ||
  139. (o=pp[-1]) == BOL ||
  140. o == EOL ||
  141. o == STAR ||
  142. o == PLUS ||
  143. o == MINUS)
  144. badpat("Illegal occurrence op.", source, s);
  145. store(ENDPAT);
  146. store(ENDPAT);
  147. spp = pp; /* Save pattern end */
  148. while (--pp > lp) /* Move pattern down */
  149. *pp = pp[-1]; /* one byte */
  150. *pp = (c == '*') ? STAR :
  151. (c == '-') ? MINUS : PLUS;
  152. pp = spp; /* Restore pattern end */
  153. continue;
  154. }
  155. /*
  156. * All the rest.
  157. */
  158. lp = pp; /* Remember start */
  159. switch(c) {
  160. case '^':
  161. store(BOL);
  162. break;
  163. case '$':
  164. store(EOL);
  165. break;
  166. case '.':
  167. store(ANY);
  168. break;
  169. case '[':
  170. s = cclass(source, s);
  171. break;
  172. case ':':
  173. if (*s) {
  174. switch(tolower(c = *s++)) {
  175. case 'a':
  176. case 'A':
  177. store(ALPHA);
  178. break;
  179. case 'd':
  180. case 'D':
  181. store(DIGIT);
  182. break;
  183. case 'n':
  184. case 'N':
  185. store(NALPHA);
  186. break;
  187. case ' ':
  188. store(PUNCT);
  189. break;
  190. default:
  191. badpat("Unknown : type", source, s);
  192. }
  193. break;
  194. }
  195. else badpat("No : type", source, s);
  196. case '\\':
  197. if (*s)
  198. c = *s++;
  199. default:
  200. store(CHAR);
  201. store(tolower(c));
  202. }
  203. }
  204. store(ENDPAT);
  205. store(0); /* Terminate string */
  206. if (debug) {
  207. for (lp = pbuf; lp < pp;) {
  208. if ((c = (*lp++ & 0377)) < ' ')
  209. printf("\\%o ", c);
  210. else printf("%c ", c);
  211. }
  212. printf("\n");
  213. }
  214. }
  215. /*** Compile a class (within []) ***********************/
  216. char *cclass(char *source, char *src)
  217. /* char *source; // Pattern start -- for error msg. */
  218. /* char *src; // Class start */
  219. {
  220. char *s; /* Source pointer */
  221. char *cp; /* Pattern start */
  222. int c; /* Current character */
  223. int o; /* Temp */
  224. s = src;
  225. o = CLASS;
  226. if (*s == '^') {
  227. ++s;
  228. o = NCLASS;
  229. }
  230. store(o);
  231. cp = pp;
  232. store(0); /* Byte count */
  233. while ((c = *s++) && c!=']') {
  234. if (c == '\\') { /* Store quoted char */
  235. if ((c = *s++) == '\0') /* Gotta get something */
  236. badpat("Class terminates badly", source, s);
  237. else store(tolower(c));
  238. }
  239. else if (c == '-' &&
  240. (pp - cp) > 1 && *s != ']' && *s != '\0') {
  241. c = pp[-1]; /* Range start */
  242. pp[-1] = RANGE; /* Range signal */
  243. store(c); /* Re-store start */
  244. c = *s++; /* Get end char and*/
  245. store(tolower(c)); /* Store it */
  246. }
  247. else {
  248. store(tolower(c)); /* Store normal char */
  249. }
  250. }
  251. if (c != ']')
  252. badpat("Unterminated class", source, s);
  253. if ((c = (pp - cp)) >= 256)
  254. badpat("Class too large", source, s);
  255. if (c == 0)
  256. badpat("Empty class", source, s);
  257. *cp = c;
  258. return(s);
  259. }
  260. /*** Store an entry in the pattern buffer **************/
  261. void store(int op)
  262. {
  263. if (pp >= &pbuf[PMAX])
  264. error("Pattern too complex\n");
  265. *pp++ = op;
  266. }
  267. /*** Report a bad pattern specification ****************/
  268. void badpat(char *message, char *source, char *stop)
  269. /* char *message; // Error message */
  270. /* char *source; // Pattern start */
  271. /* char *stop; // Pattern end */
  272. {
  273. fprintf(stderr, "-GREP-E-%s, pattern is\"%s\"\n", message, source);
  274. fprintf(stderr, "-GREP-E-Stopped at byte %ld, '%c'\n",
  275. stop-source, stop[-1]);
  276. error("?GREP-E-Bad pattern\n");
  277. }
  278. /*** Scan the file for the pattern in pbuf[] ***********/
  279. void grep(FILE *fp, char *fn)
  280. /* FILE *fp; // File to process */
  281. /* char *fn; // File name (for -f option) */
  282. {
  283. int lno, count, m;
  284. lno = 0;
  285. count = 0;
  286. while (fgets(lbuf, LMAX, fp)) {
  287. ++lno;
  288. m = match();
  289. if ((m && !vflag) || (!m && vflag)) {
  290. ++count;
  291. if (!cflag) {
  292. if (fflag && fn) {
  293. file(fn);
  294. fn = 0;
  295. }
  296. if (nflag)
  297. printf("%d\t", lno);
  298. printf("%s\n", lbuf);
  299. }
  300. }
  301. }
  302. if (cflag) {
  303. if (fflag && fn)
  304. file(fn);
  305. printf("%d\n", count);
  306. }
  307. }
  308. /*** Match line (lbuf) with pattern (pbuf) return 1 if match ***/
  309. int match()
  310. {
  311. char *l; /* Line pointer */
  312. for (l = lbuf; *l; ++l) {
  313. if (pmatch(l, pbuf))
  314. return(1);
  315. }
  316. return(0);
  317. }
  318. /*** Match partial line with pattern *******************/
  319. char *pmatch(char *line, char *pattern)
  320. /* char *line; // (partial) line to match */
  321. /* char *pattern; // (partial) pattern to match */
  322. {
  323. char *l; /* Current line pointer */
  324. char *p; /* Current pattern pointer */
  325. char c; /* Current character */
  326. char *e; /* End for STAR and PLUS match */
  327. int op; /* Pattern operation */
  328. int n; /* Class counter */
  329. char *are; /* Start of STAR match */
  330. l = line;
  331. if (debug > 1)
  332. printf("pmatch(\"%s\")\n", line);
  333. p = pattern;
  334. while ((op = *p++) != ENDPAT) {
  335. if (debug > 1)
  336. printf("byte[%ld] = 0%o, '%c', op = 0%o\n",
  337. l-line, *l, *l, op);
  338. switch(op) {
  339. case CHAR:
  340. if (tolower(*l++) != *p++)
  341. return(0);
  342. break;
  343. case BOL:
  344. if (l != lbuf)
  345. return(0);
  346. break;
  347. case EOL:
  348. if (*l != '\0')
  349. return(0);
  350. break;
  351. case ANY:
  352. if (*l++ == '\0')
  353. return(0);
  354. break;
  355. case DIGIT:
  356. if ((c = *l++) < '0' || (c > '9'))
  357. return(0);
  358. break;
  359. case ALPHA:
  360. c = tolower(*l++);
  361. if (c < 'a' || c > 'z')
  362. return(0);
  363. break;
  364. case NALPHA:
  365. c = tolower(*l++);
  366. if (c >= 'a' && c <= 'z')
  367. break;
  368. else if (c < '0' || c > '9')
  369. return(0);
  370. break;
  371. case PUNCT:
  372. c = *l++;
  373. if (c == 0 || c > ' ')
  374. return(0);
  375. break;
  376. case CLASS:
  377. case NCLASS:
  378. c = tolower(*l++);
  379. n = *p++ & 0377;
  380. do {
  381. if (*p == RANGE) {
  382. p += 3;
  383. n -= 2;
  384. if (c >= p[-2] && c <= p[-1])
  385. break;
  386. }
  387. else if (c == *p++)
  388. break;
  389. } while (--n > 1);
  390. if ((op == CLASS) == (n <= 1))
  391. return(0);
  392. if (op == CLASS)
  393. p += n - 2;
  394. break;
  395. case MINUS:
  396. e = pmatch(l, p); /* Look for a match */
  397. while (*p++ != ENDPAT); /* Skip over pattern */
  398. if (e) /* Got a match? */
  399. l = e; /* Yes, update string */
  400. break; /* Always succeeds */
  401. case PLUS: /* One or more ... */
  402. if ((l = pmatch(l, p)) == 0)
  403. return(0); /* Gotta have a match */
  404. case STAR: /* Zero or more ... */
  405. are = l; /* Remember line start */
  406. while (*l && (e = pmatch(l, p)))
  407. l = e; /* Get longest match */
  408. while (*p++ != ENDPAT); /* Skip over pattern */
  409. while (l >= are) { /* Try to match rest */
  410. if (e = pmatch(l, p))
  411. return(e);
  412. --l; /* Nope, try earlier */
  413. }
  414. return(0); /* Nothing else worked */
  415. default:
  416. printf("Bad op code %d\n", op);
  417. error("Cannot happen -- match\n");
  418. }
  419. }
  420. return(l);
  421. }
  422. /*** Report an error ***********************************/
  423. void error(char *s)
  424. {
  425. fprintf(stderr, "%s", s);
  426. exit(1);
  427. }
  428. /*** Main program - parse arguments & grep *************/
  429. int main(int argc, char **argv)
  430. {
  431. char *p;
  432. int c, i;
  433. int gotpattern;
  434. FILE *f;
  435. if (argc <= 1)
  436. usage("No arguments");
  437. if (argc == 2 && argv[1][0] == '?' && argv[1][1] == 0) {
  438. help(documentation);
  439. help(patdoc);
  440. return 0;
  441. }
  442. nfile = argc-1;
  443. gotpattern = 0;
  444. for (i=1; i < argc; ++i) {
  445. p = argv[i];
  446. if (*p == '-') {
  447. ++p;
  448. while (c = *p++) {
  449. switch(tolower(c)) {
  450. case '?':
  451. help(documentation);
  452. break;
  453. case 'C':
  454. case 'c':
  455. ++cflag;
  456. break;
  457. case 'D':
  458. case 'd':
  459. ++debug;
  460. break;
  461. case 'F':
  462. case 'f':
  463. ++fflag;
  464. break;
  465. case 'n':
  466. case 'N':
  467. ++nflag;
  468. break;
  469. case 'v':
  470. case 'V':
  471. ++vflag;
  472. break;
  473. default:
  474. usage("Unknown flag");
  475. }
  476. }
  477. argv[i] = 0;
  478. --nfile;
  479. } else if (!gotpattern) {
  480. compile(p);
  481. argv[i] = 0;
  482. ++gotpattern;
  483. --nfile;
  484. }
  485. }
  486. if (!gotpattern)
  487. usage("No pattern");
  488. if (nfile == 0)
  489. grep(stdin, 0);
  490. else {
  491. fflag = fflag ^ (nfile > 0);
  492. for (i=1; i < argc; ++i) {
  493. if (p = argv[i]) {
  494. if ((f=fopen(p, "r")) == NULL)
  495. cant(p);
  496. else {
  497. grep(f, p);
  498. fclose(f);
  499. }
  500. }
  501. }
  502. }
  503. return 0;
  504. }
  505. /* vim: set expandtab ts=4 sw=3 sts=3 tw=80 :*/