Annotation of embedaddon/php/ext/ereg/regex/main.c, revision 1.1
1.1 ! misho 1: #include <stdio.h>
! 2: #include <string.h>
! 3: #include <sys/types.h>
! 4: #include <regex.h>
! 5: #include <assert.h>
! 6: #include <stdlib.h>
! 7:
! 8: #include "main.ih"
! 9:
! 10: char *progname;
! 11: int debug = 0;
! 12: int line = 0;
! 13: int status = 0;
! 14:
! 15: int copts = REG_EXTENDED;
! 16: int eopts = 0;
! 17: regoff_t startoff = 0;
! 18: regoff_t endoff = 0;
! 19:
! 20:
! 21: extern int split();
! 22: extern void regprint();
! 23:
! 24: /*
! 25: - main - do the simple case, hand off to regress() for regression
! 26: */
! 27: int main(argc, argv)
! 28: int argc;
! 29: char *argv[];
! 30: {
! 31: regex_t re;
! 32: # define NS 10
! 33: regmatch_t subs[NS];
! 34: char erbuf[100];
! 35: int err;
! 36: size_t len;
! 37: int c;
! 38: int errflg = 0;
! 39: register int i;
! 40: extern int optind;
! 41: extern char *optarg;
! 42:
! 43: progname = argv[0];
! 44:
! 45: while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
! 46: switch (c) {
! 47: case 'c': /* compile options */
! 48: copts = options('c', optarg);
! 49: break;
! 50: case 'e': /* execute options */
! 51: eopts = options('e', optarg);
! 52: break;
! 53: case 'S': /* start offset */
! 54: startoff = (regoff_t)atoi(optarg);
! 55: break;
! 56: case 'E': /* end offset */
! 57: endoff = (regoff_t)atoi(optarg);
! 58: break;
! 59: case 'x': /* Debugging. */
! 60: debug++;
! 61: break;
! 62: case '?':
! 63: default:
! 64: errflg++;
! 65: break;
! 66: }
! 67: if (errflg) {
! 68: fprintf(stderr, "usage: %s ", progname);
! 69: fprintf(stderr, "[-c copt][-C][-d] [re]\n");
! 70: exit(2);
! 71: }
! 72:
! 73: if (optind >= argc) {
! 74: regress(stdin);
! 75: exit(status);
! 76: }
! 77:
! 78: err = regcomp(&re, argv[optind++], copts);
! 79: if (err) {
! 80: len = regerror(err, &re, erbuf, sizeof(erbuf));
! 81: fprintf(stderr, "error %s, %d/%d `%s'\n",
! 82: eprint(err), len, sizeof(erbuf), erbuf);
! 83: exit(status);
! 84: }
! 85: regprint(&re, stdout);
! 86:
! 87: if (optind >= argc) {
! 88: regfree(&re);
! 89: exit(status);
! 90: }
! 91:
! 92: if (eopts®_STARTEND) {
! 93: subs[0].rm_so = startoff;
! 94: subs[0].rm_eo = strlen(argv[optind]) - endoff;
! 95: }
! 96: err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
! 97: if (err) {
! 98: len = regerror(err, &re, erbuf, sizeof(erbuf));
! 99: fprintf(stderr, "error %s, %d/%d `%s'\n",
! 100: eprint(err), len, sizeof(erbuf), erbuf);
! 101: exit(status);
! 102: }
! 103: if (!(copts®_NOSUB)) {
! 104: len = (int)(subs[0].rm_eo - subs[0].rm_so);
! 105: if (subs[0].rm_so != -1) {
! 106: if (len != 0)
! 107: printf("match `%.*s'\n", (int)len,
! 108: argv[optind] + subs[0].rm_so);
! 109: else
! 110: printf("match `'@%.1s\n",
! 111: argv[optind] + subs[0].rm_so);
! 112: }
! 113: for (i = 1; i < NS; i++)
! 114: if (subs[i].rm_so != -1)
! 115: printf("(%d) `%.*s'\n", i,
! 116: (int)(subs[i].rm_eo - subs[i].rm_so),
! 117: argv[optind] + subs[i].rm_so);
! 118: }
! 119: exit(status);
! 120: }
! 121:
! 122: /*
! 123: - regress - main loop of regression test
! 124: == void regress(FILE *in);
! 125: */
! 126: void
! 127: regress(in)
! 128: FILE *in;
! 129: {
! 130: char inbuf[1000];
! 131: # define MAXF 10
! 132: char *f[MAXF];
! 133: int nf;
! 134: int i;
! 135: char erbuf[100];
! 136: size_t ne;
! 137: char *badpat = "invalid regular expression";
! 138: # define SHORT 10
! 139: char *bpname = "REG_BADPAT";
! 140: regex_t re;
! 141:
! 142: while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
! 143: line++;
! 144: if (inbuf[0] == '#' || inbuf[0] == '\n')
! 145: continue; /* NOTE CONTINUE */
! 146: inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
! 147: if (debug)
! 148: fprintf(stdout, "%d:\n", line);
! 149: nf = split(inbuf, f, MAXF, "\t\t");
! 150: if (nf < 3) {
! 151: fprintf(stderr, "bad input, line %d\n", line);
! 152: exit(1);
! 153: }
! 154: for (i = 0; i < nf; i++)
! 155: if (strcmp(f[i], "\"\"") == 0)
! 156: f[i] = "";
! 157: if (nf <= 3)
! 158: f[3] = NULL;
! 159: if (nf <= 4)
! 160: f[4] = NULL;
! 161: try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
! 162: if (opt('&', f[1])) /* try with either type of RE */
! 163: try(f[0], f[1], f[2], f[3], f[4],
! 164: options('c', f[1]) &~ REG_EXTENDED);
! 165: }
! 166:
! 167: ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
! 168: if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
! 169: fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
! 170: erbuf, badpat);
! 171: status = 1;
! 172: }
! 173: ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
! 174: if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
! 175: ne != strlen(badpat)+1) {
! 176: fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
! 177: erbuf, SHORT-1, badpat);
! 178: status = 1;
! 179: }
! 180: ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
! 181: if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
! 182: fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
! 183: erbuf, bpname);
! 184: status = 1;
! 185: }
! 186: re.re_endp = bpname;
! 187: ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
! 188: if (atoi(erbuf) != (int)REG_BADPAT) {
! 189: fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
! 190: erbuf, (long)REG_BADPAT);
! 191: status = 1;
! 192: } else if (ne != strlen(erbuf)+1) {
! 193: fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
! 194: erbuf, (long)REG_BADPAT);
! 195: status = 1;
! 196: }
! 197: }
! 198:
! 199: /*
! 200: - try - try it, and report on problems
! 201: == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
! 202: */
! 203: void
! 204: try(f0, f1, f2, f3, f4, opts)
! 205: char *f0;
! 206: char *f1;
! 207: char *f2;
! 208: char *f3;
! 209: char *f4;
! 210: int opts; /* may not match f1 */
! 211: {
! 212: regex_t re;
! 213: # define NSUBS 10
! 214: regmatch_t subs[NSUBS];
! 215: # define NSHOULD 15
! 216: char *should[NSHOULD];
! 217: int nshould;
! 218: char erbuf[100];
! 219: int err;
! 220: int len;
! 221: char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
! 222: register int i;
! 223: char *grump;
! 224: char f0copy[1000];
! 225: char f2copy[1000];
! 226:
! 227: strcpy(f0copy, f0);
! 228: re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL;
! 229: fixstr(f0copy);
! 230: err = regcomp(&re, f0copy, opts);
! 231: if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
! 232: /* unexpected error or wrong error */
! 233: len = regerror(err, &re, erbuf, sizeof(erbuf));
! 234: fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
! 235: line, type, eprint(err), len,
! 236: sizeof(erbuf), erbuf);
! 237: status = 1;
! 238: } else if (err == 0 && opt('C', f1)) {
! 239: /* unexpected success */
! 240: fprintf(stderr, "%d: %s should have given REG_%s\n",
! 241: line, type, f2);
! 242: status = 1;
! 243: err = 1; /* so we won't try regexec */
! 244: }
! 245:
! 246: if (err != 0) {
! 247: regfree(&re);
! 248: return;
! 249: }
! 250:
! 251: strcpy(f2copy, f2);
! 252: fixstr(f2copy);
! 253:
! 254: if (options('e', f1)®_STARTEND) {
! 255: if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
! 256: fprintf(stderr, "%d: bad STARTEND syntax\n", line);
! 257: subs[0].rm_so = strchr(f2, '(') - f2 + 1;
! 258: subs[0].rm_eo = strchr(f2, ')') - f2;
! 259: }
! 260: err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
! 261:
! 262: if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
! 263: /* unexpected error or wrong error */
! 264: len = regerror(err, &re, erbuf, sizeof(erbuf));
! 265: fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
! 266: line, type, eprint(err), len,
! 267: sizeof(erbuf), erbuf);
! 268: status = 1;
! 269: } else if (err != 0) {
! 270: /* nothing more to check */
! 271: } else if (f3 == NULL) {
! 272: /* unexpected success */
! 273: fprintf(stderr, "%d: %s exec should have failed\n",
! 274: line, type);
! 275: status = 1;
! 276: err = 1; /* just on principle */
! 277: } else if (opts®_NOSUB) {
! 278: /* nothing more to check */
! 279: } else if ((grump = check(f2, subs[0], f3)) != NULL) {
! 280: fprintf(stderr, "%d: %s %s\n", line, type, grump);
! 281: status = 1;
! 282: err = 1;
! 283: }
! 284:
! 285: if (err != 0 || f4 == NULL) {
! 286: regfree(&re);
! 287: return;
! 288: }
! 289:
! 290: for (i = 1; i < NSHOULD; i++)
! 291: should[i] = NULL;
! 292: nshould = split(f4, should+1, NSHOULD-1, ",");
! 293: if (nshould == 0) {
! 294: nshould = 1;
! 295: should[1] = "";
! 296: }
! 297: for (i = 1; i < NSUBS; i++) {
! 298: grump = check(f2, subs[i], should[i]);
! 299: if (grump != NULL) {
! 300: fprintf(stderr, "%d: %s $%d %s\n", line,
! 301: type, i, grump);
! 302: status = 1;
! 303: err = 1;
! 304: }
! 305: }
! 306:
! 307: regfree(&re);
! 308: }
! 309:
! 310: /*
! 311: - options - pick options out of a regression-test string
! 312: == int options(int type, char *s);
! 313: */
! 314: int
! 315: options(type, s)
! 316: int type; /* 'c' compile, 'e' exec */
! 317: char *s;
! 318: {
! 319: register char *p;
! 320: register int o = (type == 'c') ? copts : eopts;
! 321: register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
! 322:
! 323: for (p = s; *p != '\0'; p++)
! 324: if (strchr(legal, *p) != NULL)
! 325: switch (*p) {
! 326: case 'b':
! 327: o &= ~REG_EXTENDED;
! 328: break;
! 329: case 'i':
! 330: o |= REG_ICASE;
! 331: break;
! 332: case 's':
! 333: o |= REG_NOSUB;
! 334: break;
! 335: case 'n':
! 336: o |= REG_NEWLINE;
! 337: break;
! 338: case 'm':
! 339: o &= ~REG_EXTENDED;
! 340: o |= REG_NOSPEC;
! 341: break;
! 342: case 'p':
! 343: o |= REG_PEND;
! 344: break;
! 345: case '^':
! 346: o |= REG_NOTBOL;
! 347: break;
! 348: case '$':
! 349: o |= REG_NOTEOL;
! 350: break;
! 351: case '#':
! 352: o |= REG_STARTEND;
! 353: break;
! 354: case 't': /* trace */
! 355: o |= REG_TRACE;
! 356: break;
! 357: case 'l': /* force long representation */
! 358: o |= REG_LARGE;
! 359: break;
! 360: case 'r': /* force backref use */
! 361: o |= REG_BACKR;
! 362: break;
! 363: }
! 364: return(o);
! 365: }
! 366:
! 367: /*
! 368: - opt - is a particular option in a regression string?
! 369: == int opt(int c, char *s);
! 370: */
! 371: int /* predicate */
! 372: opt(c, s)
! 373: int c;
! 374: char *s;
! 375: {
! 376: return(strchr(s, c) != NULL);
! 377: }
! 378:
! 379: /*
! 380: - fixstr - transform magic characters in strings
! 381: == void fixstr(register char *p);
! 382: */
! 383: void
! 384: fixstr(p)
! 385: register char *p;
! 386: {
! 387: if (p == NULL)
! 388: return;
! 389:
! 390: for (; *p != '\0'; p++)
! 391: if (*p == 'N')
! 392: *p = '\n';
! 393: else if (*p == 'T')
! 394: *p = '\t';
! 395: else if (*p == 'S')
! 396: *p = ' ';
! 397: else if (*p == 'Z')
! 398: *p = '\0';
! 399: }
! 400:
! 401: /*
! 402: - check - check a substring match
! 403: == char *check(char *str, regmatch_t sub, char *should);
! 404: */
! 405: char * /* NULL or complaint */
! 406: check(str, sub, should)
! 407: char *str;
! 408: regmatch_t sub;
! 409: char *should;
! 410: {
! 411: register int len;
! 412: register int shlen;
! 413: register char *p;
! 414: static char grump[500];
! 415: register char *at = NULL;
! 416:
! 417: if (should != NULL && strcmp(should, "-") == 0)
! 418: should = NULL;
! 419: if (should != NULL && should[0] == '@') {
! 420: at = should + 1;
! 421: should = "";
! 422: }
! 423:
! 424: /* check rm_so and rm_eo for consistency */
! 425: if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
! 426: (sub.rm_so != -1 && sub.rm_eo == -1) ||
! 427: (sub.rm_so != -1 && sub.rm_so < 0) ||
! 428: (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
! 429: sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
! 430: (long)sub.rm_eo);
! 431: return(grump);
! 432: }
! 433:
! 434: /* check for no match */
! 435: if (sub.rm_so == -1 && should == NULL)
! 436: return(NULL);
! 437: if (sub.rm_so == -1)
! 438: return("did not match");
! 439:
! 440: /* check for in range */
! 441: if (sub.rm_eo > strlen(str)) {
! 442: sprintf(grump, "start %ld end %ld, past end of string",
! 443: (long)sub.rm_so, (long)sub.rm_eo);
! 444: return(grump);
! 445: }
! 446:
! 447: len = (int)(sub.rm_eo - sub.rm_so);
! 448: shlen = (int)strlen(should);
! 449: p = str + sub.rm_so;
! 450:
! 451: /* check for not supposed to match */
! 452: if (should == NULL) {
! 453: sprintf(grump, "matched `%.*s'", len, p);
! 454: return(grump);
! 455: }
! 456:
! 457: /* check for wrong match */
! 458: if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
! 459: sprintf(grump, "matched `%.*s' instead", len, p);
! 460: return(grump);
! 461: }
! 462: if (shlen > 0)
! 463: return(NULL);
! 464:
! 465: /* check null match in right place */
! 466: if (at == NULL)
! 467: return(NULL);
! 468: shlen = strlen(at);
! 469: if (shlen == 0)
! 470: shlen = 1; /* force check for end-of-string */
! 471: if (strncmp(p, at, shlen) != 0) {
! 472: sprintf(grump, "matched null at `%.20s'", p);
! 473: return(grump);
! 474: }
! 475: return(NULL);
! 476: }
! 477:
! 478: /*
! 479: - eprint - convert error number to name
! 480: == static char *eprint(int err);
! 481: */
! 482: static char *
! 483: eprint(err)
! 484: int err;
! 485: {
! 486: static char epbuf[100];
! 487: size_t len;
! 488:
! 489: len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
! 490: assert(len <= sizeof(epbuf));
! 491: return(epbuf);
! 492: }
! 493:
! 494: /*
! 495: - efind - convert error name to number
! 496: == static int efind(char *name);
! 497: */
! 498: static int
! 499: efind(name)
! 500: char *name;
! 501: {
! 502: static char efbuf[100];
! 503: regex_t re;
! 504:
! 505: sprintf(efbuf, "REG_%s", name);
! 506: assert(strlen(efbuf) < sizeof(efbuf));
! 507: re.re_endp = efbuf;
! 508: (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
! 509: return(atoi(efbuf));
! 510: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>