Annotation of embedaddon/php/ext/ereg/regex/main.c, revision 1.1.1.1
1.1 misho 1: #include <stdio.h>
2: #include <string.h>
3: #include <sys/types.h>
4: #include <regex.h>
5: #include <assert.h>
6: #include <stdlib.h>
7:
8: #include "main.ih"
9:
10: char *progname;
11: int debug = 0;
12: int line = 0;
13: int status = 0;
14:
15: int copts = REG_EXTENDED;
16: int eopts = 0;
17: regoff_t startoff = 0;
18: regoff_t endoff = 0;
19:
20:
21: extern int split();
22: extern void regprint();
23:
24: /*
25: - main - do the simple case, hand off to regress() for regression
26: */
27: int main(argc, argv)
28: int argc;
29: char *argv[];
30: {
31: regex_t re;
32: # define NS 10
33: regmatch_t subs[NS];
34: char erbuf[100];
35: int err;
36: size_t len;
37: int c;
38: int errflg = 0;
39: register int i;
40: extern int optind;
41: extern char *optarg;
42:
43: progname = argv[0];
44:
45: while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
46: switch (c) {
47: case 'c': /* compile options */
48: copts = options('c', optarg);
49: break;
50: case 'e': /* execute options */
51: eopts = options('e', optarg);
52: break;
53: case 'S': /* start offset */
54: startoff = (regoff_t)atoi(optarg);
55: break;
56: case 'E': /* end offset */
57: endoff = (regoff_t)atoi(optarg);
58: break;
59: case 'x': /* Debugging. */
60: debug++;
61: break;
62: case '?':
63: default:
64: errflg++;
65: break;
66: }
67: if (errflg) {
68: fprintf(stderr, "usage: %s ", progname);
69: fprintf(stderr, "[-c copt][-C][-d] [re]\n");
70: exit(2);
71: }
72:
73: if (optind >= argc) {
74: regress(stdin);
75: exit(status);
76: }
77:
78: err = regcomp(&re, argv[optind++], copts);
79: if (err) {
80: len = regerror(err, &re, erbuf, sizeof(erbuf));
81: fprintf(stderr, "error %s, %d/%d `%s'\n",
82: eprint(err), len, sizeof(erbuf), erbuf);
83: exit(status);
84: }
85: regprint(&re, stdout);
86:
87: if (optind >= argc) {
88: regfree(&re);
89: exit(status);
90: }
91:
92: if (eopts®_STARTEND) {
93: subs[0].rm_so = startoff;
94: subs[0].rm_eo = strlen(argv[optind]) - endoff;
95: }
96: err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
97: if (err) {
98: len = regerror(err, &re, erbuf, sizeof(erbuf));
99: fprintf(stderr, "error %s, %d/%d `%s'\n",
100: eprint(err), len, sizeof(erbuf), erbuf);
101: exit(status);
102: }
103: if (!(copts®_NOSUB)) {
104: len = (int)(subs[0].rm_eo - subs[0].rm_so);
105: if (subs[0].rm_so != -1) {
106: if (len != 0)
107: printf("match `%.*s'\n", (int)len,
108: argv[optind] + subs[0].rm_so);
109: else
110: printf("match `'@%.1s\n",
111: argv[optind] + subs[0].rm_so);
112: }
113: for (i = 1; i < NS; i++)
114: if (subs[i].rm_so != -1)
115: printf("(%d) `%.*s'\n", i,
116: (int)(subs[i].rm_eo - subs[i].rm_so),
117: argv[optind] + subs[i].rm_so);
118: }
119: exit(status);
120: }
121:
122: /*
123: - regress - main loop of regression test
124: == void regress(FILE *in);
125: */
126: void
127: regress(in)
128: FILE *in;
129: {
130: char inbuf[1000];
131: # define MAXF 10
132: char *f[MAXF];
133: int nf;
134: int i;
135: char erbuf[100];
136: size_t ne;
137: char *badpat = "invalid regular expression";
138: # define SHORT 10
139: char *bpname = "REG_BADPAT";
140: regex_t re;
141:
142: while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
143: line++;
144: if (inbuf[0] == '#' || inbuf[0] == '\n')
145: continue; /* NOTE CONTINUE */
146: inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
147: if (debug)
148: fprintf(stdout, "%d:\n", line);
149: nf = split(inbuf, f, MAXF, "\t\t");
150: if (nf < 3) {
151: fprintf(stderr, "bad input, line %d\n", line);
152: exit(1);
153: }
154: for (i = 0; i < nf; i++)
155: if (strcmp(f[i], "\"\"") == 0)
156: f[i] = "";
157: if (nf <= 3)
158: f[3] = NULL;
159: if (nf <= 4)
160: f[4] = NULL;
161: try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
162: if (opt('&', f[1])) /* try with either type of RE */
163: try(f[0], f[1], f[2], f[3], f[4],
164: options('c', f[1]) &~ REG_EXTENDED);
165: }
166:
167: ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
168: if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
169: fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
170: erbuf, badpat);
171: status = 1;
172: }
173: ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
174: if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
175: ne != strlen(badpat)+1) {
176: fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
177: erbuf, SHORT-1, badpat);
178: status = 1;
179: }
180: ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
181: if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
182: fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
183: erbuf, bpname);
184: status = 1;
185: }
186: re.re_endp = bpname;
187: ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
188: if (atoi(erbuf) != (int)REG_BADPAT) {
189: fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
190: erbuf, (long)REG_BADPAT);
191: status = 1;
192: } else if (ne != strlen(erbuf)+1) {
193: fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
194: erbuf, (long)REG_BADPAT);
195: status = 1;
196: }
197: }
198:
199: /*
200: - try - try it, and report on problems
201: == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
202: */
203: void
204: try(f0, f1, f2, f3, f4, opts)
205: char *f0;
206: char *f1;
207: char *f2;
208: char *f3;
209: char *f4;
210: int opts; /* may not match f1 */
211: {
212: regex_t re;
213: # define NSUBS 10
214: regmatch_t subs[NSUBS];
215: # define NSHOULD 15
216: char *should[NSHOULD];
217: int nshould;
218: char erbuf[100];
219: int err;
220: int len;
221: char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
222: register int i;
223: char *grump;
224: char f0copy[1000];
225: char f2copy[1000];
226:
227: strcpy(f0copy, f0);
228: re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
229: fixstr(f0copy);
230: err = regcomp(&re, f0copy, opts);
231: if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
232: /* unexpected error or wrong error */
233: len = regerror(err, &re, erbuf, sizeof(erbuf));
234: fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
235: line, type, eprint(err), len,
236: sizeof(erbuf), erbuf);
237: status = 1;
238: } else if (err == 0 && opt('C', f1)) {
239: /* unexpected success */
240: fprintf(stderr, "%d: %s should have given REG_%s\n",
241: line, type, f2);
242: status = 1;
243: err = 1; /* so we won't try regexec */
244: }
245:
246: if (err != 0) {
247: regfree(&re);
248: return;
249: }
250:
251: strcpy(f2copy, f2);
252: fixstr(f2copy);
253:
254: if (options('e', f1)®_STARTEND) {
255: if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
256: fprintf(stderr, "%d: bad STARTEND syntax\n", line);
257: subs[0].rm_so = strchr(f2, '(') - f2 + 1;
258: subs[0].rm_eo = strchr(f2, ')') - f2;
259: }
260: err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
261:
262: if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
263: /* unexpected error or wrong error */
264: len = regerror(err, &re, erbuf, sizeof(erbuf));
265: fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
266: line, type, eprint(err), len,
267: sizeof(erbuf), erbuf);
268: status = 1;
269: } else if (err != 0) {
270: /* nothing more to check */
271: } else if (f3 == NULL) {
272: /* unexpected success */
273: fprintf(stderr, "%d: %s exec should have failed\n",
274: line, type);
275: status = 1;
276: err = 1; /* just on principle */
277: } else if (opts®_NOSUB) {
278: /* nothing more to check */
279: } else if ((grump = check(f2, subs[0], f3)) != NULL) {
280: fprintf(stderr, "%d: %s %s\n", line, type, grump);
281: status = 1;
282: err = 1;
283: }
284:
285: if (err != 0 || f4 == NULL) {
286: regfree(&re);
287: return;
288: }
289:
290: for (i = 1; i < NSHOULD; i++)
291: should[i] = NULL;
292: nshould = split(f4, should+1, NSHOULD-1, ",");
293: if (nshould == 0) {
294: nshould = 1;
295: should[1] = "";
296: }
297: for (i = 1; i < NSUBS; i++) {
298: grump = check(f2, subs[i], should[i]);
299: if (grump != NULL) {
300: fprintf(stderr, "%d: %s $%d %s\n", line,
301: type, i, grump);
302: status = 1;
303: err = 1;
304: }
305: }
306:
307: regfree(&re);
308: }
309:
310: /*
311: - options - pick options out of a regression-test string
312: == int options(int type, char *s);
313: */
314: int
315: options(type, s)
316: int type; /* 'c' compile, 'e' exec */
317: char *s;
318: {
319: register char *p;
320: register int o = (type == 'c') ? copts : eopts;
321: register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
322:
323: for (p = s; *p != '\0'; p++)
324: if (strchr(legal, *p) != NULL)
325: switch (*p) {
326: case 'b':
327: o &= ~REG_EXTENDED;
328: break;
329: case 'i':
330: o |= REG_ICASE;
331: break;
332: case 's':
333: o |= REG_NOSUB;
334: break;
335: case 'n':
336: o |= REG_NEWLINE;
337: break;
338: case 'm':
339: o &= ~REG_EXTENDED;
340: o |= REG_NOSPEC;
341: break;
342: case 'p':
343: o |= REG_PEND;
344: break;
345: case '^':
346: o |= REG_NOTBOL;
347: break;
348: case '$':
349: o |= REG_NOTEOL;
350: break;
351: case '#':
352: o |= REG_STARTEND;
353: break;
354: case 't': /* trace */
355: o |= REG_TRACE;
356: break;
357: case 'l': /* force long representation */
358: o |= REG_LARGE;
359: break;
360: case 'r': /* force backref use */
361: o |= REG_BACKR;
362: break;
363: }
364: return(o);
365: }
366:
367: /*
368: - opt - is a particular option in a regression string?
369: == int opt(int c, char *s);
370: */
371: int /* predicate */
372: opt(c, s)
373: int c;
374: char *s;
375: {
376: return(strchr(s, c) != NULL);
377: }
378:
379: /*
380: - fixstr - transform magic characters in strings
381: == void fixstr(register char *p);
382: */
383: void
384: fixstr(p)
385: register char *p;
386: {
387: if (p == NULL)
388: return;
389:
390: for (; *p != '\0'; p++)
391: if (*p == 'N')
392: *p = '\n';
393: else if (*p == 'T')
394: *p = '\t';
395: else if (*p == 'S')
396: *p = ' ';
397: else if (*p == 'Z')
398: *p = '\0';
399: }
400:
401: /*
402: - check - check a substring match
403: == char *check(char *str, regmatch_t sub, char *should);
404: */
405: char * /* NULL or complaint */
406: check(str, sub, should)
407: char *str;
408: regmatch_t sub;
409: char *should;
410: {
411: register int len;
412: register int shlen;
413: register char *p;
414: static char grump[500];
415: register char *at = NULL;
416:
417: if (should != NULL && strcmp(should, "-") == 0)
418: should = NULL;
419: if (should != NULL && should[0] == '@') {
420: at = should + 1;
421: should = "";
422: }
423:
424: /* check rm_so and rm_eo for consistency */
425: if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
426: (sub.rm_so != -1 && sub.rm_eo == -1) ||
427: (sub.rm_so != -1 && sub.rm_so < 0) ||
428: (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
429: sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
430: (long)sub.rm_eo);
431: return(grump);
432: }
433:
434: /* check for no match */
435: if (sub.rm_so == -1 && should == NULL)
436: return(NULL);
437: if (sub.rm_so == -1)
438: return("did not match");
439:
440: /* check for in range */
441: if (sub.rm_eo > strlen(str)) {
442: sprintf(grump, "start %ld end %ld, past end of string",
443: (long)sub.rm_so, (long)sub.rm_eo);
444: return(grump);
445: }
446:
447: len = (int)(sub.rm_eo - sub.rm_so);
448: shlen = (int)strlen(should);
449: p = str + sub.rm_so;
450:
451: /* check for not supposed to match */
452: if (should == NULL) {
453: sprintf(grump, "matched `%.*s'", len, p);
454: return(grump);
455: }
456:
457: /* check for wrong match */
458: if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
459: sprintf(grump, "matched `%.*s' instead", len, p);
460: return(grump);
461: }
462: if (shlen > 0)
463: return(NULL);
464:
465: /* check null match in right place */
466: if (at == NULL)
467: return(NULL);
468: shlen = strlen(at);
469: if (shlen == 0)
470: shlen = 1; /* force check for end-of-string */
471: if (strncmp(p, at, shlen) != 0) {
472: sprintf(grump, "matched null at `%.20s'", p);
473: return(grump);
474: }
475: return(NULL);
476: }
477:
478: /*
479: - eprint - convert error number to name
480: == static char *eprint(int err);
481: */
482: static char *
483: eprint(err)
484: int err;
485: {
486: static char epbuf[100];
487: size_t len;
488:
489: len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
490: assert(len <= sizeof(epbuf));
491: return(epbuf);
492: }
493:
494: /*
495: - efind - convert error name to number
496: == static int efind(char *name);
497: */
498: static int
499: efind(name)
500: char *name;
501: {
502: static char efbuf[100];
503: regex_t re;
504:
505: sprintf(efbuf, "REG_%s", name);
506: assert(strlen(efbuf) < sizeof(efbuf));
507: re.re_endp = efbuf;
508: (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
509: return(atoi(efbuf));
510: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>