Annotation of embedaddon/libxml2/xmlregexp.c, revision 1.1
1.1 ! misho 1: /*
! 2: * regexp.c: generic and extensible Regular Expression engine
! 3: *
! 4: * Basically designed with the purpose of compiling regexps for
! 5: * the variety of validation/shemas mechanisms now available in
! 6: * XML related specifications these include:
! 7: * - XML-1.0 DTD validation
! 8: * - XML Schemas structure part 1
! 9: * - XML Schemas Datatypes part 2 especially Appendix F
! 10: * - RELAX-NG/TREX i.e. the counter proposal
! 11: *
! 12: * See Copyright for the status of this software.
! 13: *
! 14: * Daniel Veillard <veillard@redhat.com>
! 15: */
! 16:
! 17: #define IN_LIBXML
! 18: #include "libxml.h"
! 19:
! 20: #ifdef LIBXML_REGEXP_ENABLED
! 21:
! 22: /* #define DEBUG_ERR */
! 23:
! 24: #include <stdio.h>
! 25: #include <string.h>
! 26: #ifdef HAVE_LIMITS_H
! 27: #include <limits.h>
! 28: #endif
! 29:
! 30: #include <libxml/tree.h>
! 31: #include <libxml/parserInternals.h>
! 32: #include <libxml/xmlregexp.h>
! 33: #include <libxml/xmlautomata.h>
! 34: #include <libxml/xmlunicode.h>
! 35:
! 36: #ifndef INT_MAX
! 37: #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
! 38: #endif
! 39:
! 40: /* #define DEBUG_REGEXP_GRAPH */
! 41: /* #define DEBUG_REGEXP_EXEC */
! 42: /* #define DEBUG_PUSH */
! 43: /* #define DEBUG_COMPACTION */
! 44:
! 45: #define MAX_PUSH 10000000
! 46:
! 47: #define ERROR(str) \
! 48: ctxt->error = XML_REGEXP_COMPILE_ERROR; \
! 49: xmlRegexpErrCompile(ctxt, str);
! 50: #define NEXT ctxt->cur++
! 51: #define CUR (*(ctxt->cur))
! 52: #define NXT(index) (ctxt->cur[index])
! 53:
! 54: #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
! 55: #define NEXTL(l) ctxt->cur += l;
! 56: #define XML_REG_STRING_SEPARATOR '|'
! 57: /*
! 58: * Need PREV to check on a '-' within a Character Group. May only be used
! 59: * when it's guaranteed that cur is not at the beginning of ctxt->string!
! 60: */
! 61: #define PREV (ctxt->cur[-1])
! 62:
! 63: /**
! 64: * TODO:
! 65: *
! 66: * macro to flag unimplemented blocks
! 67: */
! 68: #define TODO \
! 69: xmlGenericError(xmlGenericErrorContext, \
! 70: "Unimplemented block at %s:%d\n", \
! 71: __FILE__, __LINE__);
! 72:
! 73: /************************************************************************
! 74: * *
! 75: * Datatypes and structures *
! 76: * *
! 77: ************************************************************************/
! 78:
! 79: /*
! 80: * Note: the order of the enums below is significant, do not shuffle
! 81: */
! 82: typedef enum {
! 83: XML_REGEXP_EPSILON = 1,
! 84: XML_REGEXP_CHARVAL,
! 85: XML_REGEXP_RANGES,
! 86: XML_REGEXP_SUBREG, /* used for () sub regexps */
! 87: XML_REGEXP_STRING,
! 88: XML_REGEXP_ANYCHAR, /* . */
! 89: XML_REGEXP_ANYSPACE, /* \s */
! 90: XML_REGEXP_NOTSPACE, /* \S */
! 91: XML_REGEXP_INITNAME, /* \l */
! 92: XML_REGEXP_NOTINITNAME, /* \L */
! 93: XML_REGEXP_NAMECHAR, /* \c */
! 94: XML_REGEXP_NOTNAMECHAR, /* \C */
! 95: XML_REGEXP_DECIMAL, /* \d */
! 96: XML_REGEXP_NOTDECIMAL, /* \D */
! 97: XML_REGEXP_REALCHAR, /* \w */
! 98: XML_REGEXP_NOTREALCHAR, /* \W */
! 99: XML_REGEXP_LETTER = 100,
! 100: XML_REGEXP_LETTER_UPPERCASE,
! 101: XML_REGEXP_LETTER_LOWERCASE,
! 102: XML_REGEXP_LETTER_TITLECASE,
! 103: XML_REGEXP_LETTER_MODIFIER,
! 104: XML_REGEXP_LETTER_OTHERS,
! 105: XML_REGEXP_MARK,
! 106: XML_REGEXP_MARK_NONSPACING,
! 107: XML_REGEXP_MARK_SPACECOMBINING,
! 108: XML_REGEXP_MARK_ENCLOSING,
! 109: XML_REGEXP_NUMBER,
! 110: XML_REGEXP_NUMBER_DECIMAL,
! 111: XML_REGEXP_NUMBER_LETTER,
! 112: XML_REGEXP_NUMBER_OTHERS,
! 113: XML_REGEXP_PUNCT,
! 114: XML_REGEXP_PUNCT_CONNECTOR,
! 115: XML_REGEXP_PUNCT_DASH,
! 116: XML_REGEXP_PUNCT_OPEN,
! 117: XML_REGEXP_PUNCT_CLOSE,
! 118: XML_REGEXP_PUNCT_INITQUOTE,
! 119: XML_REGEXP_PUNCT_FINQUOTE,
! 120: XML_REGEXP_PUNCT_OTHERS,
! 121: XML_REGEXP_SEPAR,
! 122: XML_REGEXP_SEPAR_SPACE,
! 123: XML_REGEXP_SEPAR_LINE,
! 124: XML_REGEXP_SEPAR_PARA,
! 125: XML_REGEXP_SYMBOL,
! 126: XML_REGEXP_SYMBOL_MATH,
! 127: XML_REGEXP_SYMBOL_CURRENCY,
! 128: XML_REGEXP_SYMBOL_MODIFIER,
! 129: XML_REGEXP_SYMBOL_OTHERS,
! 130: XML_REGEXP_OTHER,
! 131: XML_REGEXP_OTHER_CONTROL,
! 132: XML_REGEXP_OTHER_FORMAT,
! 133: XML_REGEXP_OTHER_PRIVATE,
! 134: XML_REGEXP_OTHER_NA,
! 135: XML_REGEXP_BLOCK_NAME
! 136: } xmlRegAtomType;
! 137:
! 138: typedef enum {
! 139: XML_REGEXP_QUANT_EPSILON = 1,
! 140: XML_REGEXP_QUANT_ONCE,
! 141: XML_REGEXP_QUANT_OPT,
! 142: XML_REGEXP_QUANT_MULT,
! 143: XML_REGEXP_QUANT_PLUS,
! 144: XML_REGEXP_QUANT_ONCEONLY,
! 145: XML_REGEXP_QUANT_ALL,
! 146: XML_REGEXP_QUANT_RANGE
! 147: } xmlRegQuantType;
! 148:
! 149: typedef enum {
! 150: XML_REGEXP_START_STATE = 1,
! 151: XML_REGEXP_FINAL_STATE,
! 152: XML_REGEXP_TRANS_STATE,
! 153: XML_REGEXP_SINK_STATE,
! 154: XML_REGEXP_UNREACH_STATE
! 155: } xmlRegStateType;
! 156:
! 157: typedef enum {
! 158: XML_REGEXP_MARK_NORMAL = 0,
! 159: XML_REGEXP_MARK_START,
! 160: XML_REGEXP_MARK_VISITED
! 161: } xmlRegMarkedType;
! 162:
! 163: typedef struct _xmlRegRange xmlRegRange;
! 164: typedef xmlRegRange *xmlRegRangePtr;
! 165:
! 166: struct _xmlRegRange {
! 167: int neg; /* 0 normal, 1 not, 2 exclude */
! 168: xmlRegAtomType type;
! 169: int start;
! 170: int end;
! 171: xmlChar *blockName;
! 172: };
! 173:
! 174: typedef struct _xmlRegAtom xmlRegAtom;
! 175: typedef xmlRegAtom *xmlRegAtomPtr;
! 176:
! 177: typedef struct _xmlAutomataState xmlRegState;
! 178: typedef xmlRegState *xmlRegStatePtr;
! 179:
! 180: struct _xmlRegAtom {
! 181: int no;
! 182: xmlRegAtomType type;
! 183: xmlRegQuantType quant;
! 184: int min;
! 185: int max;
! 186:
! 187: void *valuep;
! 188: void *valuep2;
! 189: int neg;
! 190: int codepoint;
! 191: xmlRegStatePtr start;
! 192: xmlRegStatePtr start0;
! 193: xmlRegStatePtr stop;
! 194: int maxRanges;
! 195: int nbRanges;
! 196: xmlRegRangePtr *ranges;
! 197: void *data;
! 198: };
! 199:
! 200: typedef struct _xmlRegCounter xmlRegCounter;
! 201: typedef xmlRegCounter *xmlRegCounterPtr;
! 202:
! 203: struct _xmlRegCounter {
! 204: int min;
! 205: int max;
! 206: };
! 207:
! 208: typedef struct _xmlRegTrans xmlRegTrans;
! 209: typedef xmlRegTrans *xmlRegTransPtr;
! 210:
! 211: struct _xmlRegTrans {
! 212: xmlRegAtomPtr atom;
! 213: int to;
! 214: int counter;
! 215: int count;
! 216: int nd;
! 217: };
! 218:
! 219: struct _xmlAutomataState {
! 220: xmlRegStateType type;
! 221: xmlRegMarkedType mark;
! 222: xmlRegMarkedType reached;
! 223: int no;
! 224: int maxTrans;
! 225: int nbTrans;
! 226: xmlRegTrans *trans;
! 227: /* knowing states ponting to us can speed things up */
! 228: int maxTransTo;
! 229: int nbTransTo;
! 230: int *transTo;
! 231: };
! 232:
! 233: typedef struct _xmlAutomata xmlRegParserCtxt;
! 234: typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
! 235:
! 236: #define AM_AUTOMATA_RNG 1
! 237:
! 238: struct _xmlAutomata {
! 239: xmlChar *string;
! 240: xmlChar *cur;
! 241:
! 242: int error;
! 243: int neg;
! 244:
! 245: xmlRegStatePtr start;
! 246: xmlRegStatePtr end;
! 247: xmlRegStatePtr state;
! 248:
! 249: xmlRegAtomPtr atom;
! 250:
! 251: int maxAtoms;
! 252: int nbAtoms;
! 253: xmlRegAtomPtr *atoms;
! 254:
! 255: int maxStates;
! 256: int nbStates;
! 257: xmlRegStatePtr *states;
! 258:
! 259: int maxCounters;
! 260: int nbCounters;
! 261: xmlRegCounter *counters;
! 262:
! 263: int determinist;
! 264: int negs;
! 265: int flags;
! 266: };
! 267:
! 268: struct _xmlRegexp {
! 269: xmlChar *string;
! 270: int nbStates;
! 271: xmlRegStatePtr *states;
! 272: int nbAtoms;
! 273: xmlRegAtomPtr *atoms;
! 274: int nbCounters;
! 275: xmlRegCounter *counters;
! 276: int determinist;
! 277: int flags;
! 278: /*
! 279: * That's the compact form for determinists automatas
! 280: */
! 281: int nbstates;
! 282: int *compact;
! 283: void **transdata;
! 284: int nbstrings;
! 285: xmlChar **stringMap;
! 286: };
! 287:
! 288: typedef struct _xmlRegExecRollback xmlRegExecRollback;
! 289: typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
! 290:
! 291: struct _xmlRegExecRollback {
! 292: xmlRegStatePtr state;/* the current state */
! 293: int index; /* the index in the input stack */
! 294: int nextbranch; /* the next transition to explore in that state */
! 295: int *counts; /* save the automata state if it has some */
! 296: };
! 297:
! 298: typedef struct _xmlRegInputToken xmlRegInputToken;
! 299: typedef xmlRegInputToken *xmlRegInputTokenPtr;
! 300:
! 301: struct _xmlRegInputToken {
! 302: xmlChar *value;
! 303: void *data;
! 304: };
! 305:
! 306: struct _xmlRegExecCtxt {
! 307: int status; /* execution status != 0 indicate an error */
! 308: int determinist; /* did we find an indeterministic behaviour */
! 309: xmlRegexpPtr comp; /* the compiled regexp */
! 310: xmlRegExecCallbacks callback;
! 311: void *data;
! 312:
! 313: xmlRegStatePtr state;/* the current state */
! 314: int transno; /* the current transition on that state */
! 315: int transcount; /* the number of chars in char counted transitions */
! 316:
! 317: /*
! 318: * A stack of rollback states
! 319: */
! 320: int maxRollbacks;
! 321: int nbRollbacks;
! 322: xmlRegExecRollback *rollbacks;
! 323:
! 324: /*
! 325: * The state of the automata if any
! 326: */
! 327: int *counts;
! 328:
! 329: /*
! 330: * The input stack
! 331: */
! 332: int inputStackMax;
! 333: int inputStackNr;
! 334: int index;
! 335: int *charStack;
! 336: const xmlChar *inputString; /* when operating on characters */
! 337: xmlRegInputTokenPtr inputStack;/* when operating on strings */
! 338:
! 339: /*
! 340: * error handling
! 341: */
! 342: int errStateNo; /* the error state number */
! 343: xmlRegStatePtr errState; /* the error state */
! 344: xmlChar *errString; /* the string raising the error */
! 345: int *errCounts; /* counters at the error state */
! 346: int nbPush;
! 347: };
! 348:
! 349: #define REGEXP_ALL_COUNTER 0x123456
! 350: #define REGEXP_ALL_LAX_COUNTER 0x123457
! 351:
! 352: static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
! 353: static void xmlRegFreeState(xmlRegStatePtr state);
! 354: static void xmlRegFreeAtom(xmlRegAtomPtr atom);
! 355: static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
! 356: static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
! 357: static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
! 358: int neg, int start, int end, const xmlChar *blockName);
! 359:
! 360: void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
! 361:
! 362: /************************************************************************
! 363: * *
! 364: * Regexp memory error handler *
! 365: * *
! 366: ************************************************************************/
! 367: /**
! 368: * xmlRegexpErrMemory:
! 369: * @extra: extra information
! 370: *
! 371: * Handle an out of memory condition
! 372: */
! 373: static void
! 374: xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
! 375: {
! 376: const char *regexp = NULL;
! 377: if (ctxt != NULL) {
! 378: regexp = (const char *) ctxt->string;
! 379: ctxt->error = XML_ERR_NO_MEMORY;
! 380: }
! 381: __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
! 382: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
! 383: regexp, NULL, 0, 0,
! 384: "Memory allocation failed : %s\n", extra);
! 385: }
! 386:
! 387: /**
! 388: * xmlRegexpErrCompile:
! 389: * @extra: extra information
! 390: *
! 391: * Handle a compilation failure
! 392: */
! 393: static void
! 394: xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
! 395: {
! 396: const char *regexp = NULL;
! 397: int idx = 0;
! 398:
! 399: if (ctxt != NULL) {
! 400: regexp = (const char *) ctxt->string;
! 401: idx = ctxt->cur - ctxt->string;
! 402: ctxt->error = XML_REGEXP_COMPILE_ERROR;
! 403: }
! 404: __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
! 405: XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
! 406: regexp, NULL, idx, 0,
! 407: "failed to compile: %s\n", extra);
! 408: }
! 409:
! 410: /************************************************************************
! 411: * *
! 412: * Allocation/Deallocation *
! 413: * *
! 414: ************************************************************************/
! 415:
! 416: static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
! 417: /**
! 418: * xmlRegEpxFromParse:
! 419: * @ctxt: the parser context used to build it
! 420: *
! 421: * Allocate a new regexp and fill it with the result from the parser
! 422: *
! 423: * Returns the new regexp or NULL in case of error
! 424: */
! 425: static xmlRegexpPtr
! 426: xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
! 427: xmlRegexpPtr ret;
! 428:
! 429: ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
! 430: if (ret == NULL) {
! 431: xmlRegexpErrMemory(ctxt, "compiling regexp");
! 432: return(NULL);
! 433: }
! 434: memset(ret, 0, sizeof(xmlRegexp));
! 435: ret->string = ctxt->string;
! 436: ret->nbStates = ctxt->nbStates;
! 437: ret->states = ctxt->states;
! 438: ret->nbAtoms = ctxt->nbAtoms;
! 439: ret->atoms = ctxt->atoms;
! 440: ret->nbCounters = ctxt->nbCounters;
! 441: ret->counters = ctxt->counters;
! 442: ret->determinist = ctxt->determinist;
! 443: ret->flags = ctxt->flags;
! 444: if (ret->determinist == -1) {
! 445: xmlRegexpIsDeterminist(ret);
! 446: }
! 447:
! 448: if ((ret->determinist != 0) &&
! 449: (ret->nbCounters == 0) &&
! 450: (ctxt->negs == 0) &&
! 451: (ret->atoms != NULL) &&
! 452: (ret->atoms[0] != NULL) &&
! 453: (ret->atoms[0]->type == XML_REGEXP_STRING)) {
! 454: int i, j, nbstates = 0, nbatoms = 0;
! 455: int *stateRemap;
! 456: int *stringRemap;
! 457: int *transitions;
! 458: void **transdata;
! 459: xmlChar **stringMap;
! 460: xmlChar *value;
! 461:
! 462: /*
! 463: * Switch to a compact representation
! 464: * 1/ counting the effective number of states left
! 465: * 2/ counting the unique number of atoms, and check that
! 466: * they are all of the string type
! 467: * 3/ build a table state x atom for the transitions
! 468: */
! 469:
! 470: stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
! 471: if (stateRemap == NULL) {
! 472: xmlRegexpErrMemory(ctxt, "compiling regexp");
! 473: xmlFree(ret);
! 474: return(NULL);
! 475: }
! 476: for (i = 0;i < ret->nbStates;i++) {
! 477: if (ret->states[i] != NULL) {
! 478: stateRemap[i] = nbstates;
! 479: nbstates++;
! 480: } else {
! 481: stateRemap[i] = -1;
! 482: }
! 483: }
! 484: #ifdef DEBUG_COMPACTION
! 485: printf("Final: %d states\n", nbstates);
! 486: #endif
! 487: stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
! 488: if (stringMap == NULL) {
! 489: xmlRegexpErrMemory(ctxt, "compiling regexp");
! 490: xmlFree(stateRemap);
! 491: xmlFree(ret);
! 492: return(NULL);
! 493: }
! 494: stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
! 495: if (stringRemap == NULL) {
! 496: xmlRegexpErrMemory(ctxt, "compiling regexp");
! 497: xmlFree(stringMap);
! 498: xmlFree(stateRemap);
! 499: xmlFree(ret);
! 500: return(NULL);
! 501: }
! 502: for (i = 0;i < ret->nbAtoms;i++) {
! 503: if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
! 504: (ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
! 505: value = ret->atoms[i]->valuep;
! 506: for (j = 0;j < nbatoms;j++) {
! 507: if (xmlStrEqual(stringMap[j], value)) {
! 508: stringRemap[i] = j;
! 509: break;
! 510: }
! 511: }
! 512: if (j >= nbatoms) {
! 513: stringRemap[i] = nbatoms;
! 514: stringMap[nbatoms] = xmlStrdup(value);
! 515: if (stringMap[nbatoms] == NULL) {
! 516: for (i = 0;i < nbatoms;i++)
! 517: xmlFree(stringMap[i]);
! 518: xmlFree(stringRemap);
! 519: xmlFree(stringMap);
! 520: xmlFree(stateRemap);
! 521: xmlFree(ret);
! 522: return(NULL);
! 523: }
! 524: nbatoms++;
! 525: }
! 526: } else {
! 527: xmlFree(stateRemap);
! 528: xmlFree(stringRemap);
! 529: for (i = 0;i < nbatoms;i++)
! 530: xmlFree(stringMap[i]);
! 531: xmlFree(stringMap);
! 532: xmlFree(ret);
! 533: return(NULL);
! 534: }
! 535: }
! 536: #ifdef DEBUG_COMPACTION
! 537: printf("Final: %d atoms\n", nbatoms);
! 538: #endif
! 539: transitions = (int *) xmlMalloc((nbstates + 1) *
! 540: (nbatoms + 1) * sizeof(int));
! 541: if (transitions == NULL) {
! 542: xmlFree(stateRemap);
! 543: xmlFree(stringRemap);
! 544: xmlFree(stringMap);
! 545: xmlFree(ret);
! 546: return(NULL);
! 547: }
! 548: memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
! 549:
! 550: /*
! 551: * Allocate the transition table. The first entry for each
! 552: * state corresponds to the state type.
! 553: */
! 554: transdata = NULL;
! 555:
! 556: for (i = 0;i < ret->nbStates;i++) {
! 557: int stateno, atomno, targetno, prev;
! 558: xmlRegStatePtr state;
! 559: xmlRegTransPtr trans;
! 560:
! 561: stateno = stateRemap[i];
! 562: if (stateno == -1)
! 563: continue;
! 564: state = ret->states[i];
! 565:
! 566: transitions[stateno * (nbatoms + 1)] = state->type;
! 567:
! 568: for (j = 0;j < state->nbTrans;j++) {
! 569: trans = &(state->trans[j]);
! 570: if ((trans->to == -1) || (trans->atom == NULL))
! 571: continue;
! 572: atomno = stringRemap[trans->atom->no];
! 573: if ((trans->atom->data != NULL) && (transdata == NULL)) {
! 574: transdata = (void **) xmlMalloc(nbstates * nbatoms *
! 575: sizeof(void *));
! 576: if (transdata != NULL)
! 577: memset(transdata, 0,
! 578: nbstates * nbatoms * sizeof(void *));
! 579: else {
! 580: xmlRegexpErrMemory(ctxt, "compiling regexp");
! 581: break;
! 582: }
! 583: }
! 584: targetno = stateRemap[trans->to];
! 585: /*
! 586: * if the same atom can generate transitions to 2 different
! 587: * states then it means the automata is not determinist and
! 588: * the compact form can't be used !
! 589: */
! 590: prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
! 591: if (prev != 0) {
! 592: if (prev != targetno + 1) {
! 593: ret->determinist = 0;
! 594: #ifdef DEBUG_COMPACTION
! 595: printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
! 596: i, j, trans->atom->no, trans->to, atomno, targetno);
! 597: printf(" previous to is %d\n", prev);
! 598: #endif
! 599: if (transdata != NULL)
! 600: xmlFree(transdata);
! 601: xmlFree(transitions);
! 602: xmlFree(stateRemap);
! 603: xmlFree(stringRemap);
! 604: for (i = 0;i < nbatoms;i++)
! 605: xmlFree(stringMap[i]);
! 606: xmlFree(stringMap);
! 607: goto not_determ;
! 608: }
! 609: } else {
! 610: #if 0
! 611: printf("State %d trans %d: atom %d to %d : %d to %d\n",
! 612: i, j, trans->atom->no, trans->to, atomno, targetno);
! 613: #endif
! 614: transitions[stateno * (nbatoms + 1) + atomno + 1] =
! 615: targetno + 1; /* to avoid 0 */
! 616: if (transdata != NULL)
! 617: transdata[stateno * nbatoms + atomno] =
! 618: trans->atom->data;
! 619: }
! 620: }
! 621: }
! 622: ret->determinist = 1;
! 623: #ifdef DEBUG_COMPACTION
! 624: /*
! 625: * Debug
! 626: */
! 627: for (i = 0;i < nbstates;i++) {
! 628: for (j = 0;j < nbatoms + 1;j++) {
! 629: printf("%02d ", transitions[i * (nbatoms + 1) + j]);
! 630: }
! 631: printf("\n");
! 632: }
! 633: printf("\n");
! 634: #endif
! 635: /*
! 636: * Cleanup of the old data
! 637: */
! 638: if (ret->states != NULL) {
! 639: for (i = 0;i < ret->nbStates;i++)
! 640: xmlRegFreeState(ret->states[i]);
! 641: xmlFree(ret->states);
! 642: }
! 643: ret->states = NULL;
! 644: ret->nbStates = 0;
! 645: if (ret->atoms != NULL) {
! 646: for (i = 0;i < ret->nbAtoms;i++)
! 647: xmlRegFreeAtom(ret->atoms[i]);
! 648: xmlFree(ret->atoms);
! 649: }
! 650: ret->atoms = NULL;
! 651: ret->nbAtoms = 0;
! 652:
! 653: ret->compact = transitions;
! 654: ret->transdata = transdata;
! 655: ret->stringMap = stringMap;
! 656: ret->nbstrings = nbatoms;
! 657: ret->nbstates = nbstates;
! 658: xmlFree(stateRemap);
! 659: xmlFree(stringRemap);
! 660: }
! 661: not_determ:
! 662: ctxt->string = NULL;
! 663: ctxt->nbStates = 0;
! 664: ctxt->states = NULL;
! 665: ctxt->nbAtoms = 0;
! 666: ctxt->atoms = NULL;
! 667: ctxt->nbCounters = 0;
! 668: ctxt->counters = NULL;
! 669: return(ret);
! 670: }
! 671:
! 672: /**
! 673: * xmlRegNewParserCtxt:
! 674: * @string: the string to parse
! 675: *
! 676: * Allocate a new regexp parser context
! 677: *
! 678: * Returns the new context or NULL in case of error
! 679: */
! 680: static xmlRegParserCtxtPtr
! 681: xmlRegNewParserCtxt(const xmlChar *string) {
! 682: xmlRegParserCtxtPtr ret;
! 683:
! 684: ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
! 685: if (ret == NULL)
! 686: return(NULL);
! 687: memset(ret, 0, sizeof(xmlRegParserCtxt));
! 688: if (string != NULL)
! 689: ret->string = xmlStrdup(string);
! 690: ret->cur = ret->string;
! 691: ret->neg = 0;
! 692: ret->negs = 0;
! 693: ret->error = 0;
! 694: ret->determinist = -1;
! 695: return(ret);
! 696: }
! 697:
! 698: /**
! 699: * xmlRegNewRange:
! 700: * @ctxt: the regexp parser context
! 701: * @neg: is that negative
! 702: * @type: the type of range
! 703: * @start: the start codepoint
! 704: * @end: the end codepoint
! 705: *
! 706: * Allocate a new regexp range
! 707: *
! 708: * Returns the new range or NULL in case of error
! 709: */
! 710: static xmlRegRangePtr
! 711: xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
! 712: int neg, xmlRegAtomType type, int start, int end) {
! 713: xmlRegRangePtr ret;
! 714:
! 715: ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
! 716: if (ret == NULL) {
! 717: xmlRegexpErrMemory(ctxt, "allocating range");
! 718: return(NULL);
! 719: }
! 720: ret->neg = neg;
! 721: ret->type = type;
! 722: ret->start = start;
! 723: ret->end = end;
! 724: return(ret);
! 725: }
! 726:
! 727: /**
! 728: * xmlRegFreeRange:
! 729: * @range: the regexp range
! 730: *
! 731: * Free a regexp range
! 732: */
! 733: static void
! 734: xmlRegFreeRange(xmlRegRangePtr range) {
! 735: if (range == NULL)
! 736: return;
! 737:
! 738: if (range->blockName != NULL)
! 739: xmlFree(range->blockName);
! 740: xmlFree(range);
! 741: }
! 742:
! 743: /**
! 744: * xmlRegCopyRange:
! 745: * @range: the regexp range
! 746: *
! 747: * Copy a regexp range
! 748: *
! 749: * Returns the new copy or NULL in case of error.
! 750: */
! 751: static xmlRegRangePtr
! 752: xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
! 753: xmlRegRangePtr ret;
! 754:
! 755: if (range == NULL)
! 756: return(NULL);
! 757:
! 758: ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
! 759: range->end);
! 760: if (ret == NULL)
! 761: return(NULL);
! 762: if (range->blockName != NULL) {
! 763: ret->blockName = xmlStrdup(range->blockName);
! 764: if (ret->blockName == NULL) {
! 765: xmlRegexpErrMemory(ctxt, "allocating range");
! 766: xmlRegFreeRange(ret);
! 767: return(NULL);
! 768: }
! 769: }
! 770: return(ret);
! 771: }
! 772:
! 773: /**
! 774: * xmlRegNewAtom:
! 775: * @ctxt: the regexp parser context
! 776: * @type: the type of atom
! 777: *
! 778: * Allocate a new atom
! 779: *
! 780: * Returns the new atom or NULL in case of error
! 781: */
! 782: static xmlRegAtomPtr
! 783: xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
! 784: xmlRegAtomPtr ret;
! 785:
! 786: ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
! 787: if (ret == NULL) {
! 788: xmlRegexpErrMemory(ctxt, "allocating atom");
! 789: return(NULL);
! 790: }
! 791: memset(ret, 0, sizeof(xmlRegAtom));
! 792: ret->type = type;
! 793: ret->quant = XML_REGEXP_QUANT_ONCE;
! 794: ret->min = 0;
! 795: ret->max = 0;
! 796: return(ret);
! 797: }
! 798:
! 799: /**
! 800: * xmlRegFreeAtom:
! 801: * @atom: the regexp atom
! 802: *
! 803: * Free a regexp atom
! 804: */
! 805: static void
! 806: xmlRegFreeAtom(xmlRegAtomPtr atom) {
! 807: int i;
! 808:
! 809: if (atom == NULL)
! 810: return;
! 811:
! 812: for (i = 0;i < atom->nbRanges;i++)
! 813: xmlRegFreeRange(atom->ranges[i]);
! 814: if (atom->ranges != NULL)
! 815: xmlFree(atom->ranges);
! 816: if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
! 817: xmlFree(atom->valuep);
! 818: if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
! 819: xmlFree(atom->valuep2);
! 820: if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
! 821: xmlFree(atom->valuep);
! 822: xmlFree(atom);
! 823: }
! 824:
! 825: /**
! 826: * xmlRegCopyAtom:
! 827: * @ctxt: the regexp parser context
! 828: * @atom: the oiginal atom
! 829: *
! 830: * Allocate a new regexp range
! 831: *
! 832: * Returns the new atom or NULL in case of error
! 833: */
! 834: static xmlRegAtomPtr
! 835: xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
! 836: xmlRegAtomPtr ret;
! 837:
! 838: ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
! 839: if (ret == NULL) {
! 840: xmlRegexpErrMemory(ctxt, "copying atom");
! 841: return(NULL);
! 842: }
! 843: memset(ret, 0, sizeof(xmlRegAtom));
! 844: ret->type = atom->type;
! 845: ret->quant = atom->quant;
! 846: ret->min = atom->min;
! 847: ret->max = atom->max;
! 848: if (atom->nbRanges > 0) {
! 849: int i;
! 850:
! 851: ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
! 852: atom->nbRanges);
! 853: if (ret->ranges == NULL) {
! 854: xmlRegexpErrMemory(ctxt, "copying atom");
! 855: goto error;
! 856: }
! 857: for (i = 0;i < atom->nbRanges;i++) {
! 858: ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
! 859: if (ret->ranges[i] == NULL)
! 860: goto error;
! 861: ret->nbRanges = i + 1;
! 862: }
! 863: }
! 864: return(ret);
! 865:
! 866: error:
! 867: xmlRegFreeAtom(ret);
! 868: return(NULL);
! 869: }
! 870:
! 871: static xmlRegStatePtr
! 872: xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
! 873: xmlRegStatePtr ret;
! 874:
! 875: ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
! 876: if (ret == NULL) {
! 877: xmlRegexpErrMemory(ctxt, "allocating state");
! 878: return(NULL);
! 879: }
! 880: memset(ret, 0, sizeof(xmlRegState));
! 881: ret->type = XML_REGEXP_TRANS_STATE;
! 882: ret->mark = XML_REGEXP_MARK_NORMAL;
! 883: return(ret);
! 884: }
! 885:
! 886: /**
! 887: * xmlRegFreeState:
! 888: * @state: the regexp state
! 889: *
! 890: * Free a regexp state
! 891: */
! 892: static void
! 893: xmlRegFreeState(xmlRegStatePtr state) {
! 894: if (state == NULL)
! 895: return;
! 896:
! 897: if (state->trans != NULL)
! 898: xmlFree(state->trans);
! 899: if (state->transTo != NULL)
! 900: xmlFree(state->transTo);
! 901: xmlFree(state);
! 902: }
! 903:
! 904: /**
! 905: * xmlRegFreeParserCtxt:
! 906: * @ctxt: the regexp parser context
! 907: *
! 908: * Free a regexp parser context
! 909: */
! 910: static void
! 911: xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
! 912: int i;
! 913: if (ctxt == NULL)
! 914: return;
! 915:
! 916: if (ctxt->string != NULL)
! 917: xmlFree(ctxt->string);
! 918: if (ctxt->states != NULL) {
! 919: for (i = 0;i < ctxt->nbStates;i++)
! 920: xmlRegFreeState(ctxt->states[i]);
! 921: xmlFree(ctxt->states);
! 922: }
! 923: if (ctxt->atoms != NULL) {
! 924: for (i = 0;i < ctxt->nbAtoms;i++)
! 925: xmlRegFreeAtom(ctxt->atoms[i]);
! 926: xmlFree(ctxt->atoms);
! 927: }
! 928: if (ctxt->counters != NULL)
! 929: xmlFree(ctxt->counters);
! 930: xmlFree(ctxt);
! 931: }
! 932:
! 933: /************************************************************************
! 934: * *
! 935: * Display of Data structures *
! 936: * *
! 937: ************************************************************************/
! 938:
! 939: static void
! 940: xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
! 941: switch (type) {
! 942: case XML_REGEXP_EPSILON:
! 943: fprintf(output, "epsilon "); break;
! 944: case XML_REGEXP_CHARVAL:
! 945: fprintf(output, "charval "); break;
! 946: case XML_REGEXP_RANGES:
! 947: fprintf(output, "ranges "); break;
! 948: case XML_REGEXP_SUBREG:
! 949: fprintf(output, "subexpr "); break;
! 950: case XML_REGEXP_STRING:
! 951: fprintf(output, "string "); break;
! 952: case XML_REGEXP_ANYCHAR:
! 953: fprintf(output, "anychar "); break;
! 954: case XML_REGEXP_ANYSPACE:
! 955: fprintf(output, "anyspace "); break;
! 956: case XML_REGEXP_NOTSPACE:
! 957: fprintf(output, "notspace "); break;
! 958: case XML_REGEXP_INITNAME:
! 959: fprintf(output, "initname "); break;
! 960: case XML_REGEXP_NOTINITNAME:
! 961: fprintf(output, "notinitname "); break;
! 962: case XML_REGEXP_NAMECHAR:
! 963: fprintf(output, "namechar "); break;
! 964: case XML_REGEXP_NOTNAMECHAR:
! 965: fprintf(output, "notnamechar "); break;
! 966: case XML_REGEXP_DECIMAL:
! 967: fprintf(output, "decimal "); break;
! 968: case XML_REGEXP_NOTDECIMAL:
! 969: fprintf(output, "notdecimal "); break;
! 970: case XML_REGEXP_REALCHAR:
! 971: fprintf(output, "realchar "); break;
! 972: case XML_REGEXP_NOTREALCHAR:
! 973: fprintf(output, "notrealchar "); break;
! 974: case XML_REGEXP_LETTER:
! 975: fprintf(output, "LETTER "); break;
! 976: case XML_REGEXP_LETTER_UPPERCASE:
! 977: fprintf(output, "LETTER_UPPERCASE "); break;
! 978: case XML_REGEXP_LETTER_LOWERCASE:
! 979: fprintf(output, "LETTER_LOWERCASE "); break;
! 980: case XML_REGEXP_LETTER_TITLECASE:
! 981: fprintf(output, "LETTER_TITLECASE "); break;
! 982: case XML_REGEXP_LETTER_MODIFIER:
! 983: fprintf(output, "LETTER_MODIFIER "); break;
! 984: case XML_REGEXP_LETTER_OTHERS:
! 985: fprintf(output, "LETTER_OTHERS "); break;
! 986: case XML_REGEXP_MARK:
! 987: fprintf(output, "MARK "); break;
! 988: case XML_REGEXP_MARK_NONSPACING:
! 989: fprintf(output, "MARK_NONSPACING "); break;
! 990: case XML_REGEXP_MARK_SPACECOMBINING:
! 991: fprintf(output, "MARK_SPACECOMBINING "); break;
! 992: case XML_REGEXP_MARK_ENCLOSING:
! 993: fprintf(output, "MARK_ENCLOSING "); break;
! 994: case XML_REGEXP_NUMBER:
! 995: fprintf(output, "NUMBER "); break;
! 996: case XML_REGEXP_NUMBER_DECIMAL:
! 997: fprintf(output, "NUMBER_DECIMAL "); break;
! 998: case XML_REGEXP_NUMBER_LETTER:
! 999: fprintf(output, "NUMBER_LETTER "); break;
! 1000: case XML_REGEXP_NUMBER_OTHERS:
! 1001: fprintf(output, "NUMBER_OTHERS "); break;
! 1002: case XML_REGEXP_PUNCT:
! 1003: fprintf(output, "PUNCT "); break;
! 1004: case XML_REGEXP_PUNCT_CONNECTOR:
! 1005: fprintf(output, "PUNCT_CONNECTOR "); break;
! 1006: case XML_REGEXP_PUNCT_DASH:
! 1007: fprintf(output, "PUNCT_DASH "); break;
! 1008: case XML_REGEXP_PUNCT_OPEN:
! 1009: fprintf(output, "PUNCT_OPEN "); break;
! 1010: case XML_REGEXP_PUNCT_CLOSE:
! 1011: fprintf(output, "PUNCT_CLOSE "); break;
! 1012: case XML_REGEXP_PUNCT_INITQUOTE:
! 1013: fprintf(output, "PUNCT_INITQUOTE "); break;
! 1014: case XML_REGEXP_PUNCT_FINQUOTE:
! 1015: fprintf(output, "PUNCT_FINQUOTE "); break;
! 1016: case XML_REGEXP_PUNCT_OTHERS:
! 1017: fprintf(output, "PUNCT_OTHERS "); break;
! 1018: case XML_REGEXP_SEPAR:
! 1019: fprintf(output, "SEPAR "); break;
! 1020: case XML_REGEXP_SEPAR_SPACE:
! 1021: fprintf(output, "SEPAR_SPACE "); break;
! 1022: case XML_REGEXP_SEPAR_LINE:
! 1023: fprintf(output, "SEPAR_LINE "); break;
! 1024: case XML_REGEXP_SEPAR_PARA:
! 1025: fprintf(output, "SEPAR_PARA "); break;
! 1026: case XML_REGEXP_SYMBOL:
! 1027: fprintf(output, "SYMBOL "); break;
! 1028: case XML_REGEXP_SYMBOL_MATH:
! 1029: fprintf(output, "SYMBOL_MATH "); break;
! 1030: case XML_REGEXP_SYMBOL_CURRENCY:
! 1031: fprintf(output, "SYMBOL_CURRENCY "); break;
! 1032: case XML_REGEXP_SYMBOL_MODIFIER:
! 1033: fprintf(output, "SYMBOL_MODIFIER "); break;
! 1034: case XML_REGEXP_SYMBOL_OTHERS:
! 1035: fprintf(output, "SYMBOL_OTHERS "); break;
! 1036: case XML_REGEXP_OTHER:
! 1037: fprintf(output, "OTHER "); break;
! 1038: case XML_REGEXP_OTHER_CONTROL:
! 1039: fprintf(output, "OTHER_CONTROL "); break;
! 1040: case XML_REGEXP_OTHER_FORMAT:
! 1041: fprintf(output, "OTHER_FORMAT "); break;
! 1042: case XML_REGEXP_OTHER_PRIVATE:
! 1043: fprintf(output, "OTHER_PRIVATE "); break;
! 1044: case XML_REGEXP_OTHER_NA:
! 1045: fprintf(output, "OTHER_NA "); break;
! 1046: case XML_REGEXP_BLOCK_NAME:
! 1047: fprintf(output, "BLOCK "); break;
! 1048: }
! 1049: }
! 1050:
! 1051: static void
! 1052: xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
! 1053: switch (type) {
! 1054: case XML_REGEXP_QUANT_EPSILON:
! 1055: fprintf(output, "epsilon "); break;
! 1056: case XML_REGEXP_QUANT_ONCE:
! 1057: fprintf(output, "once "); break;
! 1058: case XML_REGEXP_QUANT_OPT:
! 1059: fprintf(output, "? "); break;
! 1060: case XML_REGEXP_QUANT_MULT:
! 1061: fprintf(output, "* "); break;
! 1062: case XML_REGEXP_QUANT_PLUS:
! 1063: fprintf(output, "+ "); break;
! 1064: case XML_REGEXP_QUANT_RANGE:
! 1065: fprintf(output, "range "); break;
! 1066: case XML_REGEXP_QUANT_ONCEONLY:
! 1067: fprintf(output, "onceonly "); break;
! 1068: case XML_REGEXP_QUANT_ALL:
! 1069: fprintf(output, "all "); break;
! 1070: }
! 1071: }
! 1072: static void
! 1073: xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
! 1074: fprintf(output, " range: ");
! 1075: if (range->neg)
! 1076: fprintf(output, "negative ");
! 1077: xmlRegPrintAtomType(output, range->type);
! 1078: fprintf(output, "%c - %c\n", range->start, range->end);
! 1079: }
! 1080:
! 1081: static void
! 1082: xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
! 1083: fprintf(output, " atom: ");
! 1084: if (atom == NULL) {
! 1085: fprintf(output, "NULL\n");
! 1086: return;
! 1087: }
! 1088: if (atom->neg)
! 1089: fprintf(output, "not ");
! 1090: xmlRegPrintAtomType(output, atom->type);
! 1091: xmlRegPrintQuantType(output, atom->quant);
! 1092: if (atom->quant == XML_REGEXP_QUANT_RANGE)
! 1093: fprintf(output, "%d-%d ", atom->min, atom->max);
! 1094: if (atom->type == XML_REGEXP_STRING)
! 1095: fprintf(output, "'%s' ", (char *) atom->valuep);
! 1096: if (atom->type == XML_REGEXP_CHARVAL)
! 1097: fprintf(output, "char %c\n", atom->codepoint);
! 1098: else if (atom->type == XML_REGEXP_RANGES) {
! 1099: int i;
! 1100: fprintf(output, "%d entries\n", atom->nbRanges);
! 1101: for (i = 0; i < atom->nbRanges;i++)
! 1102: xmlRegPrintRange(output, atom->ranges[i]);
! 1103: } else if (atom->type == XML_REGEXP_SUBREG) {
! 1104: fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
! 1105: } else {
! 1106: fprintf(output, "\n");
! 1107: }
! 1108: }
! 1109:
! 1110: static void
! 1111: xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
! 1112: fprintf(output, " trans: ");
! 1113: if (trans == NULL) {
! 1114: fprintf(output, "NULL\n");
! 1115: return;
! 1116: }
! 1117: if (trans->to < 0) {
! 1118: fprintf(output, "removed\n");
! 1119: return;
! 1120: }
! 1121: if (trans->nd != 0) {
! 1122: if (trans->nd == 2)
! 1123: fprintf(output, "last not determinist, ");
! 1124: else
! 1125: fprintf(output, "not determinist, ");
! 1126: }
! 1127: if (trans->counter >= 0) {
! 1128: fprintf(output, "counted %d, ", trans->counter);
! 1129: }
! 1130: if (trans->count == REGEXP_ALL_COUNTER) {
! 1131: fprintf(output, "all transition, ");
! 1132: } else if (trans->count >= 0) {
! 1133: fprintf(output, "count based %d, ", trans->count);
! 1134: }
! 1135: if (trans->atom == NULL) {
! 1136: fprintf(output, "epsilon to %d\n", trans->to);
! 1137: return;
! 1138: }
! 1139: if (trans->atom->type == XML_REGEXP_CHARVAL)
! 1140: fprintf(output, "char %c ", trans->atom->codepoint);
! 1141: fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
! 1142: }
! 1143:
! 1144: static void
! 1145: xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
! 1146: int i;
! 1147:
! 1148: fprintf(output, " state: ");
! 1149: if (state == NULL) {
! 1150: fprintf(output, "NULL\n");
! 1151: return;
! 1152: }
! 1153: if (state->type == XML_REGEXP_START_STATE)
! 1154: fprintf(output, "START ");
! 1155: if (state->type == XML_REGEXP_FINAL_STATE)
! 1156: fprintf(output, "FINAL ");
! 1157:
! 1158: fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
! 1159: for (i = 0;i < state->nbTrans; i++) {
! 1160: xmlRegPrintTrans(output, &(state->trans[i]));
! 1161: }
! 1162: }
! 1163:
! 1164: #ifdef DEBUG_REGEXP_GRAPH
! 1165: static void
! 1166: xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
! 1167: int i;
! 1168:
! 1169: fprintf(output, " ctxt: ");
! 1170: if (ctxt == NULL) {
! 1171: fprintf(output, "NULL\n");
! 1172: return;
! 1173: }
! 1174: fprintf(output, "'%s' ", ctxt->string);
! 1175: if (ctxt->error)
! 1176: fprintf(output, "error ");
! 1177: if (ctxt->neg)
! 1178: fprintf(output, "neg ");
! 1179: fprintf(output, "\n");
! 1180: fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
! 1181: for (i = 0;i < ctxt->nbAtoms; i++) {
! 1182: fprintf(output, " %02d ", i);
! 1183: xmlRegPrintAtom(output, ctxt->atoms[i]);
! 1184: }
! 1185: if (ctxt->atom != NULL) {
! 1186: fprintf(output, "current atom:\n");
! 1187: xmlRegPrintAtom(output, ctxt->atom);
! 1188: }
! 1189: fprintf(output, "%d states:", ctxt->nbStates);
! 1190: if (ctxt->start != NULL)
! 1191: fprintf(output, " start: %d", ctxt->start->no);
! 1192: if (ctxt->end != NULL)
! 1193: fprintf(output, " end: %d", ctxt->end->no);
! 1194: fprintf(output, "\n");
! 1195: for (i = 0;i < ctxt->nbStates; i++) {
! 1196: xmlRegPrintState(output, ctxt->states[i]);
! 1197: }
! 1198: fprintf(output, "%d counters:\n", ctxt->nbCounters);
! 1199: for (i = 0;i < ctxt->nbCounters; i++) {
! 1200: fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
! 1201: ctxt->counters[i].max);
! 1202: }
! 1203: }
! 1204: #endif
! 1205:
! 1206: /************************************************************************
! 1207: * *
! 1208: * Finite Automata structures manipulations *
! 1209: * *
! 1210: ************************************************************************/
! 1211:
! 1212: static void
! 1213: xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
! 1214: int neg, xmlRegAtomType type, int start, int end,
! 1215: xmlChar *blockName) {
! 1216: xmlRegRangePtr range;
! 1217:
! 1218: if (atom == NULL) {
! 1219: ERROR("add range: atom is NULL");
! 1220: return;
! 1221: }
! 1222: if (atom->type != XML_REGEXP_RANGES) {
! 1223: ERROR("add range: atom is not ranges");
! 1224: return;
! 1225: }
! 1226: if (atom->maxRanges == 0) {
! 1227: atom->maxRanges = 4;
! 1228: atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
! 1229: sizeof(xmlRegRangePtr));
! 1230: if (atom->ranges == NULL) {
! 1231: xmlRegexpErrMemory(ctxt, "adding ranges");
! 1232: atom->maxRanges = 0;
! 1233: return;
! 1234: }
! 1235: } else if (atom->nbRanges >= atom->maxRanges) {
! 1236: xmlRegRangePtr *tmp;
! 1237: atom->maxRanges *= 2;
! 1238: tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
! 1239: sizeof(xmlRegRangePtr));
! 1240: if (tmp == NULL) {
! 1241: xmlRegexpErrMemory(ctxt, "adding ranges");
! 1242: atom->maxRanges /= 2;
! 1243: return;
! 1244: }
! 1245: atom->ranges = tmp;
! 1246: }
! 1247: range = xmlRegNewRange(ctxt, neg, type, start, end);
! 1248: if (range == NULL)
! 1249: return;
! 1250: range->blockName = blockName;
! 1251: atom->ranges[atom->nbRanges++] = range;
! 1252:
! 1253: }
! 1254:
! 1255: static int
! 1256: xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
! 1257: if (ctxt->maxCounters == 0) {
! 1258: ctxt->maxCounters = 4;
! 1259: ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
! 1260: sizeof(xmlRegCounter));
! 1261: if (ctxt->counters == NULL) {
! 1262: xmlRegexpErrMemory(ctxt, "allocating counter");
! 1263: ctxt->maxCounters = 0;
! 1264: return(-1);
! 1265: }
! 1266: } else if (ctxt->nbCounters >= ctxt->maxCounters) {
! 1267: xmlRegCounter *tmp;
! 1268: ctxt->maxCounters *= 2;
! 1269: tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
! 1270: sizeof(xmlRegCounter));
! 1271: if (tmp == NULL) {
! 1272: xmlRegexpErrMemory(ctxt, "allocating counter");
! 1273: ctxt->maxCounters /= 2;
! 1274: return(-1);
! 1275: }
! 1276: ctxt->counters = tmp;
! 1277: }
! 1278: ctxt->counters[ctxt->nbCounters].min = -1;
! 1279: ctxt->counters[ctxt->nbCounters].max = -1;
! 1280: return(ctxt->nbCounters++);
! 1281: }
! 1282:
! 1283: static int
! 1284: xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
! 1285: if (atom == NULL) {
! 1286: ERROR("atom push: atom is NULL");
! 1287: return(-1);
! 1288: }
! 1289: if (ctxt->maxAtoms == 0) {
! 1290: ctxt->maxAtoms = 4;
! 1291: ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
! 1292: sizeof(xmlRegAtomPtr));
! 1293: if (ctxt->atoms == NULL) {
! 1294: xmlRegexpErrMemory(ctxt, "pushing atom");
! 1295: ctxt->maxAtoms = 0;
! 1296: return(-1);
! 1297: }
! 1298: } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
! 1299: xmlRegAtomPtr *tmp;
! 1300: ctxt->maxAtoms *= 2;
! 1301: tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
! 1302: sizeof(xmlRegAtomPtr));
! 1303: if (tmp == NULL) {
! 1304: xmlRegexpErrMemory(ctxt, "allocating counter");
! 1305: ctxt->maxAtoms /= 2;
! 1306: return(-1);
! 1307: }
! 1308: ctxt->atoms = tmp;
! 1309: }
! 1310: atom->no = ctxt->nbAtoms;
! 1311: ctxt->atoms[ctxt->nbAtoms++] = atom;
! 1312: return(0);
! 1313: }
! 1314:
! 1315: static void
! 1316: xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
! 1317: int from) {
! 1318: if (target->maxTransTo == 0) {
! 1319: target->maxTransTo = 8;
! 1320: target->transTo = (int *) xmlMalloc(target->maxTransTo *
! 1321: sizeof(int));
! 1322: if (target->transTo == NULL) {
! 1323: xmlRegexpErrMemory(ctxt, "adding transition");
! 1324: target->maxTransTo = 0;
! 1325: return;
! 1326: }
! 1327: } else if (target->nbTransTo >= target->maxTransTo) {
! 1328: int *tmp;
! 1329: target->maxTransTo *= 2;
! 1330: tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
! 1331: sizeof(int));
! 1332: if (tmp == NULL) {
! 1333: xmlRegexpErrMemory(ctxt, "adding transition");
! 1334: target->maxTransTo /= 2;
! 1335: return;
! 1336: }
! 1337: target->transTo = tmp;
! 1338: }
! 1339: target->transTo[target->nbTransTo] = from;
! 1340: target->nbTransTo++;
! 1341: }
! 1342:
! 1343: static void
! 1344: xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
! 1345: xmlRegAtomPtr atom, xmlRegStatePtr target,
! 1346: int counter, int count) {
! 1347:
! 1348: int nrtrans;
! 1349:
! 1350: if (state == NULL) {
! 1351: ERROR("add state: state is NULL");
! 1352: return;
! 1353: }
! 1354: if (target == NULL) {
! 1355: ERROR("add state: target is NULL");
! 1356: return;
! 1357: }
! 1358: /*
! 1359: * Other routines follow the philosophy 'When in doubt, add a transition'
! 1360: * so we check here whether such a transition is already present and, if
! 1361: * so, silently ignore this request.
! 1362: */
! 1363:
! 1364: for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
! 1365: xmlRegTransPtr trans = &(state->trans[nrtrans]);
! 1366: if ((trans->atom == atom) &&
! 1367: (trans->to == target->no) &&
! 1368: (trans->counter == counter) &&
! 1369: (trans->count == count)) {
! 1370: #ifdef DEBUG_REGEXP_GRAPH
! 1371: printf("Ignoring duplicate transition from %d to %d\n",
! 1372: state->no, target->no);
! 1373: #endif
! 1374: return;
! 1375: }
! 1376: }
! 1377:
! 1378: if (state->maxTrans == 0) {
! 1379: state->maxTrans = 8;
! 1380: state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
! 1381: sizeof(xmlRegTrans));
! 1382: if (state->trans == NULL) {
! 1383: xmlRegexpErrMemory(ctxt, "adding transition");
! 1384: state->maxTrans = 0;
! 1385: return;
! 1386: }
! 1387: } else if (state->nbTrans >= state->maxTrans) {
! 1388: xmlRegTrans *tmp;
! 1389: state->maxTrans *= 2;
! 1390: tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
! 1391: sizeof(xmlRegTrans));
! 1392: if (tmp == NULL) {
! 1393: xmlRegexpErrMemory(ctxt, "adding transition");
! 1394: state->maxTrans /= 2;
! 1395: return;
! 1396: }
! 1397: state->trans = tmp;
! 1398: }
! 1399: #ifdef DEBUG_REGEXP_GRAPH
! 1400: printf("Add trans from %d to %d ", state->no, target->no);
! 1401: if (count == REGEXP_ALL_COUNTER)
! 1402: printf("all transition\n");
! 1403: else if (count >= 0)
! 1404: printf("count based %d\n", count);
! 1405: else if (counter >= 0)
! 1406: printf("counted %d\n", counter);
! 1407: else if (atom == NULL)
! 1408: printf("epsilon transition\n");
! 1409: else if (atom != NULL)
! 1410: xmlRegPrintAtom(stdout, atom);
! 1411: #endif
! 1412:
! 1413: state->trans[state->nbTrans].atom = atom;
! 1414: state->trans[state->nbTrans].to = target->no;
! 1415: state->trans[state->nbTrans].counter = counter;
! 1416: state->trans[state->nbTrans].count = count;
! 1417: state->trans[state->nbTrans].nd = 0;
! 1418: state->nbTrans++;
! 1419: xmlRegStateAddTransTo(ctxt, target, state->no);
! 1420: }
! 1421:
! 1422: static int
! 1423: xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
! 1424: if (state == NULL) return(-1);
! 1425: if (ctxt->maxStates == 0) {
! 1426: ctxt->maxStates = 4;
! 1427: ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
! 1428: sizeof(xmlRegStatePtr));
! 1429: if (ctxt->states == NULL) {
! 1430: xmlRegexpErrMemory(ctxt, "adding state");
! 1431: ctxt->maxStates = 0;
! 1432: return(-1);
! 1433: }
! 1434: } else if (ctxt->nbStates >= ctxt->maxStates) {
! 1435: xmlRegStatePtr *tmp;
! 1436: ctxt->maxStates *= 2;
! 1437: tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
! 1438: sizeof(xmlRegStatePtr));
! 1439: if (tmp == NULL) {
! 1440: xmlRegexpErrMemory(ctxt, "adding state");
! 1441: ctxt->maxStates /= 2;
! 1442: return(-1);
! 1443: }
! 1444: ctxt->states = tmp;
! 1445: }
! 1446: state->no = ctxt->nbStates;
! 1447: ctxt->states[ctxt->nbStates++] = state;
! 1448: return(0);
! 1449: }
! 1450:
! 1451: /**
! 1452: * xmlFAGenerateAllTransition:
! 1453: * @ctxt: a regexp parser context
! 1454: * @from: the from state
! 1455: * @to: the target state or NULL for building a new one
! 1456: * @lax:
! 1457: *
! 1458: */
! 1459: static void
! 1460: xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
! 1461: xmlRegStatePtr from, xmlRegStatePtr to,
! 1462: int lax) {
! 1463: if (to == NULL) {
! 1464: to = xmlRegNewState(ctxt);
! 1465: xmlRegStatePush(ctxt, to);
! 1466: ctxt->state = to;
! 1467: }
! 1468: if (lax)
! 1469: xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
! 1470: else
! 1471: xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
! 1472: }
! 1473:
! 1474: /**
! 1475: * xmlFAGenerateEpsilonTransition:
! 1476: * @ctxt: a regexp parser context
! 1477: * @from: the from state
! 1478: * @to: the target state or NULL for building a new one
! 1479: *
! 1480: */
! 1481: static void
! 1482: xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
! 1483: xmlRegStatePtr from, xmlRegStatePtr to) {
! 1484: if (to == NULL) {
! 1485: to = xmlRegNewState(ctxt);
! 1486: xmlRegStatePush(ctxt, to);
! 1487: ctxt->state = to;
! 1488: }
! 1489: xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
! 1490: }
! 1491:
! 1492: /**
! 1493: * xmlFAGenerateCountedEpsilonTransition:
! 1494: * @ctxt: a regexp parser context
! 1495: * @from: the from state
! 1496: * @to: the target state or NULL for building a new one
! 1497: * counter: the counter for that transition
! 1498: *
! 1499: */
! 1500: static void
! 1501: xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
! 1502: xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
! 1503: if (to == NULL) {
! 1504: to = xmlRegNewState(ctxt);
! 1505: xmlRegStatePush(ctxt, to);
! 1506: ctxt->state = to;
! 1507: }
! 1508: xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
! 1509: }
! 1510:
! 1511: /**
! 1512: * xmlFAGenerateCountedTransition:
! 1513: * @ctxt: a regexp parser context
! 1514: * @from: the from state
! 1515: * @to: the target state or NULL for building a new one
! 1516: * counter: the counter for that transition
! 1517: *
! 1518: */
! 1519: static void
! 1520: xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
! 1521: xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
! 1522: if (to == NULL) {
! 1523: to = xmlRegNewState(ctxt);
! 1524: xmlRegStatePush(ctxt, to);
! 1525: ctxt->state = to;
! 1526: }
! 1527: xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
! 1528: }
! 1529:
! 1530: /**
! 1531: * xmlFAGenerateTransitions:
! 1532: * @ctxt: a regexp parser context
! 1533: * @from: the from state
! 1534: * @to: the target state or NULL for building a new one
! 1535: * @atom: the atom generating the transition
! 1536: *
! 1537: * Returns 0 if success and -1 in case of error.
! 1538: */
! 1539: static int
! 1540: xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
! 1541: xmlRegStatePtr to, xmlRegAtomPtr atom) {
! 1542: xmlRegStatePtr end;
! 1543:
! 1544: if (atom == NULL) {
! 1545: ERROR("genrate transition: atom == NULL");
! 1546: return(-1);
! 1547: }
! 1548: if (atom->type == XML_REGEXP_SUBREG) {
! 1549: /*
! 1550: * this is a subexpression handling one should not need to
! 1551: * create a new node except for XML_REGEXP_QUANT_RANGE.
! 1552: */
! 1553: if (xmlRegAtomPush(ctxt, atom) < 0) {
! 1554: return(-1);
! 1555: }
! 1556: if ((to != NULL) && (atom->stop != to) &&
! 1557: (atom->quant != XML_REGEXP_QUANT_RANGE)) {
! 1558: /*
! 1559: * Generate an epsilon transition to link to the target
! 1560: */
! 1561: xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
! 1562: #ifdef DV
! 1563: } else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
! 1564: (atom->quant != XML_REGEXP_QUANT_ONCE)) {
! 1565: to = xmlRegNewState(ctxt);
! 1566: xmlRegStatePush(ctxt, to);
! 1567: ctxt->state = to;
! 1568: xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
! 1569: #endif
! 1570: }
! 1571: switch (atom->quant) {
! 1572: case XML_REGEXP_QUANT_OPT:
! 1573: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1574: /*
! 1575: * transition done to the state after end of atom.
! 1576: * 1. set transition from atom start to new state
! 1577: * 2. set transition from atom end to this state.
! 1578: */
! 1579: if (to == NULL) {
! 1580: xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
! 1581: xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
! 1582: ctxt->state);
! 1583: } else {
! 1584: xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
! 1585: }
! 1586: break;
! 1587: case XML_REGEXP_QUANT_MULT:
! 1588: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1589: xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
! 1590: xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
! 1591: break;
! 1592: case XML_REGEXP_QUANT_PLUS:
! 1593: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1594: xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
! 1595: break;
! 1596: case XML_REGEXP_QUANT_RANGE: {
! 1597: int counter;
! 1598: xmlRegStatePtr inter, newstate;
! 1599:
! 1600: /*
! 1601: * create the final state now if needed
! 1602: */
! 1603: if (to != NULL) {
! 1604: newstate = to;
! 1605: } else {
! 1606: newstate = xmlRegNewState(ctxt);
! 1607: xmlRegStatePush(ctxt, newstate);
! 1608: }
! 1609:
! 1610: /*
! 1611: * The principle here is to use counted transition
! 1612: * to avoid explosion in the number of states in the
! 1613: * graph. This is clearly more complex but should not
! 1614: * be exploitable at runtime.
! 1615: */
! 1616: if ((atom->min == 0) && (atom->start0 == NULL)) {
! 1617: xmlRegAtomPtr copy;
! 1618: /*
! 1619: * duplicate a transition based on atom to count next
! 1620: * occurences after 1. We cannot loop to atom->start
! 1621: * directly because we need an epsilon transition to
! 1622: * newstate.
! 1623: */
! 1624: /* ???? For some reason it seems we never reach that
! 1625: case, I suppose this got optimized out before when
! 1626: building the automata */
! 1627: copy = xmlRegCopyAtom(ctxt, atom);
! 1628: if (copy == NULL)
! 1629: return(-1);
! 1630: copy->quant = XML_REGEXP_QUANT_ONCE;
! 1631: copy->min = 0;
! 1632: copy->max = 0;
! 1633:
! 1634: if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
! 1635: < 0)
! 1636: return(-1);
! 1637: inter = ctxt->state;
! 1638: counter = xmlRegGetCounter(ctxt);
! 1639: ctxt->counters[counter].min = atom->min - 1;
! 1640: ctxt->counters[counter].max = atom->max - 1;
! 1641: /* count the number of times we see it again */
! 1642: xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
! 1643: atom->stop, counter);
! 1644: /* allow a way out based on the count */
! 1645: xmlFAGenerateCountedTransition(ctxt, inter,
! 1646: newstate, counter);
! 1647: /* and also allow a direct exit for 0 */
! 1648: xmlFAGenerateEpsilonTransition(ctxt, atom->start,
! 1649: newstate);
! 1650: } else {
! 1651: /*
! 1652: * either we need the atom at least once or there
! 1653: * is an atom->start0 allowing to easilly plug the
! 1654: * epsilon transition.
! 1655: */
! 1656: counter = xmlRegGetCounter(ctxt);
! 1657: ctxt->counters[counter].min = atom->min - 1;
! 1658: ctxt->counters[counter].max = atom->max - 1;
! 1659: /* count the number of times we see it again */
! 1660: xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
! 1661: atom->start, counter);
! 1662: /* allow a way out based on the count */
! 1663: xmlFAGenerateCountedTransition(ctxt, atom->stop,
! 1664: newstate, counter);
! 1665: /* and if needed allow a direct exit for 0 */
! 1666: if (atom->min == 0)
! 1667: xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
! 1668: newstate);
! 1669:
! 1670: }
! 1671: atom->min = 0;
! 1672: atom->max = 0;
! 1673: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1674: ctxt->state = newstate;
! 1675: }
! 1676: default:
! 1677: break;
! 1678: }
! 1679: return(0);
! 1680: }
! 1681: if ((atom->min == 0) && (atom->max == 0) &&
! 1682: (atom->quant == XML_REGEXP_QUANT_RANGE)) {
! 1683: /*
! 1684: * we can discard the atom and generate an epsilon transition instead
! 1685: */
! 1686: if (to == NULL) {
! 1687: to = xmlRegNewState(ctxt);
! 1688: if (to != NULL)
! 1689: xmlRegStatePush(ctxt, to);
! 1690: else {
! 1691: return(-1);
! 1692: }
! 1693: }
! 1694: xmlFAGenerateEpsilonTransition(ctxt, from, to);
! 1695: ctxt->state = to;
! 1696: xmlRegFreeAtom(atom);
! 1697: return(0);
! 1698: }
! 1699: if (to == NULL) {
! 1700: to = xmlRegNewState(ctxt);
! 1701: if (to != NULL)
! 1702: xmlRegStatePush(ctxt, to);
! 1703: else {
! 1704: return(-1);
! 1705: }
! 1706: }
! 1707: end = to;
! 1708: if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
! 1709: (atom->quant == XML_REGEXP_QUANT_PLUS)) {
! 1710: /*
! 1711: * Do not pollute the target state by adding transitions from
! 1712: * it as it is likely to be the shared target of multiple branches.
! 1713: * So isolate with an epsilon transition.
! 1714: */
! 1715: xmlRegStatePtr tmp;
! 1716:
! 1717: tmp = xmlRegNewState(ctxt);
! 1718: if (tmp != NULL)
! 1719: xmlRegStatePush(ctxt, tmp);
! 1720: else {
! 1721: return(-1);
! 1722: }
! 1723: xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
! 1724: to = tmp;
! 1725: }
! 1726: if (xmlRegAtomPush(ctxt, atom) < 0) {
! 1727: return(-1);
! 1728: }
! 1729: xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
! 1730: ctxt->state = end;
! 1731: switch (atom->quant) {
! 1732: case XML_REGEXP_QUANT_OPT:
! 1733: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1734: xmlFAGenerateEpsilonTransition(ctxt, from, to);
! 1735: break;
! 1736: case XML_REGEXP_QUANT_MULT:
! 1737: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1738: xmlFAGenerateEpsilonTransition(ctxt, from, to);
! 1739: xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
! 1740: break;
! 1741: case XML_REGEXP_QUANT_PLUS:
! 1742: atom->quant = XML_REGEXP_QUANT_ONCE;
! 1743: xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
! 1744: break;
! 1745: case XML_REGEXP_QUANT_RANGE:
! 1746: #if DV_test
! 1747: if (atom->min == 0) {
! 1748: xmlFAGenerateEpsilonTransition(ctxt, from, to);
! 1749: }
! 1750: #endif
! 1751: break;
! 1752: default:
! 1753: break;
! 1754: }
! 1755: return(0);
! 1756: }
! 1757:
! 1758: /**
! 1759: * xmlFAReduceEpsilonTransitions:
! 1760: * @ctxt: a regexp parser context
! 1761: * @fromnr: the from state
! 1762: * @tonr: the to state
! 1763: * @counter: should that transition be associated to a counted
! 1764: *
! 1765: */
! 1766: static void
! 1767: xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
! 1768: int tonr, int counter) {
! 1769: int transnr;
! 1770: xmlRegStatePtr from;
! 1771: xmlRegStatePtr to;
! 1772:
! 1773: #ifdef DEBUG_REGEXP_GRAPH
! 1774: printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
! 1775: #endif
! 1776: from = ctxt->states[fromnr];
! 1777: if (from == NULL)
! 1778: return;
! 1779: to = ctxt->states[tonr];
! 1780: if (to == NULL)
! 1781: return;
! 1782: if ((to->mark == XML_REGEXP_MARK_START) ||
! 1783: (to->mark == XML_REGEXP_MARK_VISITED))
! 1784: return;
! 1785:
! 1786: to->mark = XML_REGEXP_MARK_VISITED;
! 1787: if (to->type == XML_REGEXP_FINAL_STATE) {
! 1788: #ifdef DEBUG_REGEXP_GRAPH
! 1789: printf("State %d is final, so %d becomes final\n", tonr, fromnr);
! 1790: #endif
! 1791: from->type = XML_REGEXP_FINAL_STATE;
! 1792: }
! 1793: for (transnr = 0;transnr < to->nbTrans;transnr++) {
! 1794: if (to->trans[transnr].to < 0)
! 1795: continue;
! 1796: if (to->trans[transnr].atom == NULL) {
! 1797: /*
! 1798: * Don't remove counted transitions
! 1799: * Don't loop either
! 1800: */
! 1801: if (to->trans[transnr].to != fromnr) {
! 1802: if (to->trans[transnr].count >= 0) {
! 1803: int newto = to->trans[transnr].to;
! 1804:
! 1805: xmlRegStateAddTrans(ctxt, from, NULL,
! 1806: ctxt->states[newto],
! 1807: -1, to->trans[transnr].count);
! 1808: } else {
! 1809: #ifdef DEBUG_REGEXP_GRAPH
! 1810: printf("Found epsilon trans %d from %d to %d\n",
! 1811: transnr, tonr, to->trans[transnr].to);
! 1812: #endif
! 1813: if (to->trans[transnr].counter >= 0) {
! 1814: xmlFAReduceEpsilonTransitions(ctxt, fromnr,
! 1815: to->trans[transnr].to,
! 1816: to->trans[transnr].counter);
! 1817: } else {
! 1818: xmlFAReduceEpsilonTransitions(ctxt, fromnr,
! 1819: to->trans[transnr].to,
! 1820: counter);
! 1821: }
! 1822: }
! 1823: }
! 1824: } else {
! 1825: int newto = to->trans[transnr].to;
! 1826:
! 1827: if (to->trans[transnr].counter >= 0) {
! 1828: xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
! 1829: ctxt->states[newto],
! 1830: to->trans[transnr].counter, -1);
! 1831: } else {
! 1832: xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
! 1833: ctxt->states[newto], counter, -1);
! 1834: }
! 1835: }
! 1836: }
! 1837: to->mark = XML_REGEXP_MARK_NORMAL;
! 1838: }
! 1839:
! 1840: /**
! 1841: * xmlFAEliminateSimpleEpsilonTransitions:
! 1842: * @ctxt: a regexp parser context
! 1843: *
! 1844: * Eliminating general epsilon transitions can get costly in the general
! 1845: * algorithm due to the large amount of generated new transitions and
! 1846: * associated comparisons. However for simple epsilon transition used just
! 1847: * to separate building blocks when generating the automata this can be
! 1848: * reduced to state elimination:
! 1849: * - if there exists an epsilon from X to Y
! 1850: * - if there is no other transition from X
! 1851: * then X and Y are semantically equivalent and X can be eliminated
! 1852: * If X is the start state then make Y the start state, else replace the
! 1853: * target of all transitions to X by transitions to Y.
! 1854: */
! 1855: static void
! 1856: xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
! 1857: int statenr, i, j, newto;
! 1858: xmlRegStatePtr state, tmp;
! 1859:
! 1860: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 1861: state = ctxt->states[statenr];
! 1862: if (state == NULL)
! 1863: continue;
! 1864: if (state->nbTrans != 1)
! 1865: continue;
! 1866: if (state->type == XML_REGEXP_UNREACH_STATE)
! 1867: continue;
! 1868: /* is the only transition out a basic transition */
! 1869: if ((state->trans[0].atom == NULL) &&
! 1870: (state->trans[0].to >= 0) &&
! 1871: (state->trans[0].to != statenr) &&
! 1872: (state->trans[0].counter < 0) &&
! 1873: (state->trans[0].count < 0)) {
! 1874: newto = state->trans[0].to;
! 1875:
! 1876: if (state->type == XML_REGEXP_START_STATE) {
! 1877: #ifdef DEBUG_REGEXP_GRAPH
! 1878: printf("Found simple epsilon trans from start %d to %d\n",
! 1879: statenr, newto);
! 1880: #endif
! 1881: } else {
! 1882: #ifdef DEBUG_REGEXP_GRAPH
! 1883: printf("Found simple epsilon trans from %d to %d\n",
! 1884: statenr, newto);
! 1885: #endif
! 1886: for (i = 0;i < state->nbTransTo;i++) {
! 1887: tmp = ctxt->states[state->transTo[i]];
! 1888: for (j = 0;j < tmp->nbTrans;j++) {
! 1889: if (tmp->trans[j].to == statenr) {
! 1890: #ifdef DEBUG_REGEXP_GRAPH
! 1891: printf("Changed transition %d on %d to go to %d\n",
! 1892: j, tmp->no, newto);
! 1893: #endif
! 1894: tmp->trans[j].to = -1;
! 1895: xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
! 1896: ctxt->states[newto],
! 1897: tmp->trans[j].counter,
! 1898: tmp->trans[j].count);
! 1899: }
! 1900: }
! 1901: }
! 1902: if (state->type == XML_REGEXP_FINAL_STATE)
! 1903: ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
! 1904: /* eliminate the transition completely */
! 1905: state->nbTrans = 0;
! 1906:
! 1907: state->type = XML_REGEXP_UNREACH_STATE;
! 1908:
! 1909: }
! 1910:
! 1911: }
! 1912: }
! 1913: }
! 1914: /**
! 1915: * xmlFAEliminateEpsilonTransitions:
! 1916: * @ctxt: a regexp parser context
! 1917: *
! 1918: */
! 1919: static void
! 1920: xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
! 1921: int statenr, transnr;
! 1922: xmlRegStatePtr state;
! 1923: int has_epsilon;
! 1924:
! 1925: if (ctxt->states == NULL) return;
! 1926:
! 1927: /*
! 1928: * Eliminate simple epsilon transition and the associated unreachable
! 1929: * states.
! 1930: */
! 1931: xmlFAEliminateSimpleEpsilonTransitions(ctxt);
! 1932: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 1933: state = ctxt->states[statenr];
! 1934: if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
! 1935: #ifdef DEBUG_REGEXP_GRAPH
! 1936: printf("Removed unreachable state %d\n", statenr);
! 1937: #endif
! 1938: xmlRegFreeState(state);
! 1939: ctxt->states[statenr] = NULL;
! 1940: }
! 1941: }
! 1942:
! 1943: has_epsilon = 0;
! 1944:
! 1945: /*
! 1946: * Build the completed transitions bypassing the epsilons
! 1947: * Use a marking algorithm to avoid loops
! 1948: * Mark sink states too.
! 1949: * Process from the latests states backward to the start when
! 1950: * there is long cascading epsilon chains this minimize the
! 1951: * recursions and transition compares when adding the new ones
! 1952: */
! 1953: for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
! 1954: state = ctxt->states[statenr];
! 1955: if (state == NULL)
! 1956: continue;
! 1957: if ((state->nbTrans == 0) &&
! 1958: (state->type != XML_REGEXP_FINAL_STATE)) {
! 1959: state->type = XML_REGEXP_SINK_STATE;
! 1960: }
! 1961: for (transnr = 0;transnr < state->nbTrans;transnr++) {
! 1962: if ((state->trans[transnr].atom == NULL) &&
! 1963: (state->trans[transnr].to >= 0)) {
! 1964: if (state->trans[transnr].to == statenr) {
! 1965: state->trans[transnr].to = -1;
! 1966: #ifdef DEBUG_REGEXP_GRAPH
! 1967: printf("Removed loopback epsilon trans %d on %d\n",
! 1968: transnr, statenr);
! 1969: #endif
! 1970: } else if (state->trans[transnr].count < 0) {
! 1971: int newto = state->trans[transnr].to;
! 1972:
! 1973: #ifdef DEBUG_REGEXP_GRAPH
! 1974: printf("Found epsilon trans %d from %d to %d\n",
! 1975: transnr, statenr, newto);
! 1976: #endif
! 1977: has_epsilon = 1;
! 1978: state->trans[transnr].to = -2;
! 1979: state->mark = XML_REGEXP_MARK_START;
! 1980: xmlFAReduceEpsilonTransitions(ctxt, statenr,
! 1981: newto, state->trans[transnr].counter);
! 1982: state->mark = XML_REGEXP_MARK_NORMAL;
! 1983: #ifdef DEBUG_REGEXP_GRAPH
! 1984: } else {
! 1985: printf("Found counted transition %d on %d\n",
! 1986: transnr, statenr);
! 1987: #endif
! 1988: }
! 1989: }
! 1990: }
! 1991: }
! 1992: /*
! 1993: * Eliminate the epsilon transitions
! 1994: */
! 1995: if (has_epsilon) {
! 1996: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 1997: state = ctxt->states[statenr];
! 1998: if (state == NULL)
! 1999: continue;
! 2000: for (transnr = 0;transnr < state->nbTrans;transnr++) {
! 2001: xmlRegTransPtr trans = &(state->trans[transnr]);
! 2002: if ((trans->atom == NULL) &&
! 2003: (trans->count < 0) &&
! 2004: (trans->to >= 0)) {
! 2005: trans->to = -1;
! 2006: }
! 2007: }
! 2008: }
! 2009: }
! 2010:
! 2011: /*
! 2012: * Use this pass to detect unreachable states too
! 2013: */
! 2014: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 2015: state = ctxt->states[statenr];
! 2016: if (state != NULL)
! 2017: state->reached = XML_REGEXP_MARK_NORMAL;
! 2018: }
! 2019: state = ctxt->states[0];
! 2020: if (state != NULL)
! 2021: state->reached = XML_REGEXP_MARK_START;
! 2022: while (state != NULL) {
! 2023: xmlRegStatePtr target = NULL;
! 2024: state->reached = XML_REGEXP_MARK_VISITED;
! 2025: /*
! 2026: * Mark all states reachable from the current reachable state
! 2027: */
! 2028: for (transnr = 0;transnr < state->nbTrans;transnr++) {
! 2029: if ((state->trans[transnr].to >= 0) &&
! 2030: ((state->trans[transnr].atom != NULL) ||
! 2031: (state->trans[transnr].count >= 0))) {
! 2032: int newto = state->trans[transnr].to;
! 2033:
! 2034: if (ctxt->states[newto] == NULL)
! 2035: continue;
! 2036: if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
! 2037: ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
! 2038: target = ctxt->states[newto];
! 2039: }
! 2040: }
! 2041: }
! 2042:
! 2043: /*
! 2044: * find the next accessible state not explored
! 2045: */
! 2046: if (target == NULL) {
! 2047: for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
! 2048: state = ctxt->states[statenr];
! 2049: if ((state != NULL) && (state->reached ==
! 2050: XML_REGEXP_MARK_START)) {
! 2051: target = state;
! 2052: break;
! 2053: }
! 2054: }
! 2055: }
! 2056: state = target;
! 2057: }
! 2058: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 2059: state = ctxt->states[statenr];
! 2060: if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
! 2061: #ifdef DEBUG_REGEXP_GRAPH
! 2062: printf("Removed unreachable state %d\n", statenr);
! 2063: #endif
! 2064: xmlRegFreeState(state);
! 2065: ctxt->states[statenr] = NULL;
! 2066: }
! 2067: }
! 2068:
! 2069: }
! 2070:
! 2071: static int
! 2072: xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
! 2073: int ret = 0;
! 2074:
! 2075: if ((range1->type == XML_REGEXP_RANGES) ||
! 2076: (range2->type == XML_REGEXP_RANGES) ||
! 2077: (range2->type == XML_REGEXP_SUBREG) ||
! 2078: (range1->type == XML_REGEXP_SUBREG) ||
! 2079: (range1->type == XML_REGEXP_STRING) ||
! 2080: (range2->type == XML_REGEXP_STRING))
! 2081: return(-1);
! 2082:
! 2083: /* put them in order */
! 2084: if (range1->type > range2->type) {
! 2085: xmlRegRangePtr tmp;
! 2086:
! 2087: tmp = range1;
! 2088: range1 = range2;
! 2089: range2 = tmp;
! 2090: }
! 2091: if ((range1->type == XML_REGEXP_ANYCHAR) ||
! 2092: (range2->type == XML_REGEXP_ANYCHAR)) {
! 2093: ret = 1;
! 2094: } else if ((range1->type == XML_REGEXP_EPSILON) ||
! 2095: (range2->type == XML_REGEXP_EPSILON)) {
! 2096: return(0);
! 2097: } else if (range1->type == range2->type) {
! 2098: if (range1->type != XML_REGEXP_CHARVAL)
! 2099: ret = 1;
! 2100: else if ((range1->end < range2->start) ||
! 2101: (range2->end < range1->start))
! 2102: ret = 0;
! 2103: else
! 2104: ret = 1;
! 2105: } else if (range1->type == XML_REGEXP_CHARVAL) {
! 2106: int codepoint;
! 2107: int neg = 0;
! 2108:
! 2109: /*
! 2110: * just check all codepoints in the range for acceptance,
! 2111: * this is usually way cheaper since done only once at
! 2112: * compilation than testing over and over at runtime or
! 2113: * pushing too many states when evaluating.
! 2114: */
! 2115: if (((range1->neg == 0) && (range2->neg != 0)) ||
! 2116: ((range1->neg != 0) && (range2->neg == 0)))
! 2117: neg = 1;
! 2118:
! 2119: for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
! 2120: ret = xmlRegCheckCharacterRange(range2->type, codepoint,
! 2121: 0, range2->start, range2->end,
! 2122: range2->blockName);
! 2123: if (ret < 0)
! 2124: return(-1);
! 2125: if (((neg == 1) && (ret == 0)) ||
! 2126: ((neg == 0) && (ret == 1)))
! 2127: return(1);
! 2128: }
! 2129: return(0);
! 2130: } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
! 2131: (range2->type == XML_REGEXP_BLOCK_NAME)) {
! 2132: if (range1->type == range2->type) {
! 2133: ret = xmlStrEqual(range1->blockName, range2->blockName);
! 2134: } else {
! 2135: /*
! 2136: * comparing a block range with anything else is way
! 2137: * too costly, and maintining the table is like too much
! 2138: * memory too, so let's force the automata to save state
! 2139: * here.
! 2140: */
! 2141: return(1);
! 2142: }
! 2143: } else if ((range1->type < XML_REGEXP_LETTER) ||
! 2144: (range2->type < XML_REGEXP_LETTER)) {
! 2145: if ((range1->type == XML_REGEXP_ANYSPACE) &&
! 2146: (range2->type == XML_REGEXP_NOTSPACE))
! 2147: ret = 0;
! 2148: else if ((range1->type == XML_REGEXP_INITNAME) &&
! 2149: (range2->type == XML_REGEXP_NOTINITNAME))
! 2150: ret = 0;
! 2151: else if ((range1->type == XML_REGEXP_NAMECHAR) &&
! 2152: (range2->type == XML_REGEXP_NOTNAMECHAR))
! 2153: ret = 0;
! 2154: else if ((range1->type == XML_REGEXP_DECIMAL) &&
! 2155: (range2->type == XML_REGEXP_NOTDECIMAL))
! 2156: ret = 0;
! 2157: else if ((range1->type == XML_REGEXP_REALCHAR) &&
! 2158: (range2->type == XML_REGEXP_NOTREALCHAR))
! 2159: ret = 0;
! 2160: else {
! 2161: /* same thing to limit complexity */
! 2162: return(1);
! 2163: }
! 2164: } else {
! 2165: ret = 0;
! 2166: /* range1->type < range2->type here */
! 2167: switch (range1->type) {
! 2168: case XML_REGEXP_LETTER:
! 2169: /* all disjoint except in the subgroups */
! 2170: if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
! 2171: (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
! 2172: (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
! 2173: (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
! 2174: (range2->type == XML_REGEXP_LETTER_OTHERS))
! 2175: ret = 1;
! 2176: break;
! 2177: case XML_REGEXP_MARK:
! 2178: if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
! 2179: (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
! 2180: (range2->type == XML_REGEXP_MARK_ENCLOSING))
! 2181: ret = 1;
! 2182: break;
! 2183: case XML_REGEXP_NUMBER:
! 2184: if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
! 2185: (range2->type == XML_REGEXP_NUMBER_LETTER) ||
! 2186: (range2->type == XML_REGEXP_NUMBER_OTHERS))
! 2187: ret = 1;
! 2188: break;
! 2189: case XML_REGEXP_PUNCT:
! 2190: if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
! 2191: (range2->type == XML_REGEXP_PUNCT_DASH) ||
! 2192: (range2->type == XML_REGEXP_PUNCT_OPEN) ||
! 2193: (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
! 2194: (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
! 2195: (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
! 2196: (range2->type == XML_REGEXP_PUNCT_OTHERS))
! 2197: ret = 1;
! 2198: break;
! 2199: case XML_REGEXP_SEPAR:
! 2200: if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
! 2201: (range2->type == XML_REGEXP_SEPAR_LINE) ||
! 2202: (range2->type == XML_REGEXP_SEPAR_PARA))
! 2203: ret = 1;
! 2204: break;
! 2205: case XML_REGEXP_SYMBOL:
! 2206: if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
! 2207: (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
! 2208: (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
! 2209: (range2->type == XML_REGEXP_SYMBOL_OTHERS))
! 2210: ret = 1;
! 2211: break;
! 2212: case XML_REGEXP_OTHER:
! 2213: if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
! 2214: (range2->type == XML_REGEXP_OTHER_FORMAT) ||
! 2215: (range2->type == XML_REGEXP_OTHER_PRIVATE))
! 2216: ret = 1;
! 2217: break;
! 2218: default:
! 2219: if ((range2->type >= XML_REGEXP_LETTER) &&
! 2220: (range2->type < XML_REGEXP_BLOCK_NAME))
! 2221: ret = 0;
! 2222: else {
! 2223: /* safety net ! */
! 2224: return(1);
! 2225: }
! 2226: }
! 2227: }
! 2228: if (((range1->neg == 0) && (range2->neg != 0)) ||
! 2229: ((range1->neg != 0) && (range2->neg == 0)))
! 2230: ret = !ret;
! 2231: return(ret);
! 2232: }
! 2233:
! 2234: /**
! 2235: * xmlFACompareAtomTypes:
! 2236: * @type1: an atom type
! 2237: * @type2: an atom type
! 2238: *
! 2239: * Compares two atoms type to check whether they intersect in some ways,
! 2240: * this is used by xmlFACompareAtoms only
! 2241: *
! 2242: * Returns 1 if they may intersect and 0 otherwise
! 2243: */
! 2244: static int
! 2245: xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
! 2246: if ((type1 == XML_REGEXP_EPSILON) ||
! 2247: (type1 == XML_REGEXP_CHARVAL) ||
! 2248: (type1 == XML_REGEXP_RANGES) ||
! 2249: (type1 == XML_REGEXP_SUBREG) ||
! 2250: (type1 == XML_REGEXP_STRING) ||
! 2251: (type1 == XML_REGEXP_ANYCHAR))
! 2252: return(1);
! 2253: if ((type2 == XML_REGEXP_EPSILON) ||
! 2254: (type2 == XML_REGEXP_CHARVAL) ||
! 2255: (type2 == XML_REGEXP_RANGES) ||
! 2256: (type2 == XML_REGEXP_SUBREG) ||
! 2257: (type2 == XML_REGEXP_STRING) ||
! 2258: (type2 == XML_REGEXP_ANYCHAR))
! 2259: return(1);
! 2260:
! 2261: if (type1 == type2) return(1);
! 2262:
! 2263: /* simplify subsequent compares by making sure type1 < type2 */
! 2264: if (type1 > type2) {
! 2265: xmlRegAtomType tmp = type1;
! 2266: type1 = type2;
! 2267: type2 = tmp;
! 2268: }
! 2269: switch (type1) {
! 2270: case XML_REGEXP_ANYSPACE: /* \s */
! 2271: /* can't be a letter, number, mark, pontuation, symbol */
! 2272: if ((type2 == XML_REGEXP_NOTSPACE) ||
! 2273: ((type2 >= XML_REGEXP_LETTER) &&
! 2274: (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
! 2275: ((type2 >= XML_REGEXP_NUMBER) &&
! 2276: (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
! 2277: ((type2 >= XML_REGEXP_MARK) &&
! 2278: (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
! 2279: ((type2 >= XML_REGEXP_PUNCT) &&
! 2280: (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
! 2281: ((type2 >= XML_REGEXP_SYMBOL) &&
! 2282: (type2 <= XML_REGEXP_SYMBOL_OTHERS))
! 2283: ) return(0);
! 2284: break;
! 2285: case XML_REGEXP_NOTSPACE: /* \S */
! 2286: break;
! 2287: case XML_REGEXP_INITNAME: /* \l */
! 2288: /* can't be a number, mark, separator, pontuation, symbol or other */
! 2289: if ((type2 == XML_REGEXP_NOTINITNAME) ||
! 2290: ((type2 >= XML_REGEXP_NUMBER) &&
! 2291: (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
! 2292: ((type2 >= XML_REGEXP_MARK) &&
! 2293: (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
! 2294: ((type2 >= XML_REGEXP_SEPAR) &&
! 2295: (type2 <= XML_REGEXP_SEPAR_PARA)) ||
! 2296: ((type2 >= XML_REGEXP_PUNCT) &&
! 2297: (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
! 2298: ((type2 >= XML_REGEXP_SYMBOL) &&
! 2299: (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
! 2300: ((type2 >= XML_REGEXP_OTHER) &&
! 2301: (type2 <= XML_REGEXP_OTHER_NA))
! 2302: ) return(0);
! 2303: break;
! 2304: case XML_REGEXP_NOTINITNAME: /* \L */
! 2305: break;
! 2306: case XML_REGEXP_NAMECHAR: /* \c */
! 2307: /* can't be a mark, separator, pontuation, symbol or other */
! 2308: if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
! 2309: ((type2 >= XML_REGEXP_MARK) &&
! 2310: (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
! 2311: ((type2 >= XML_REGEXP_PUNCT) &&
! 2312: (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
! 2313: ((type2 >= XML_REGEXP_SEPAR) &&
! 2314: (type2 <= XML_REGEXP_SEPAR_PARA)) ||
! 2315: ((type2 >= XML_REGEXP_SYMBOL) &&
! 2316: (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
! 2317: ((type2 >= XML_REGEXP_OTHER) &&
! 2318: (type2 <= XML_REGEXP_OTHER_NA))
! 2319: ) return(0);
! 2320: break;
! 2321: case XML_REGEXP_NOTNAMECHAR: /* \C */
! 2322: break;
! 2323: case XML_REGEXP_DECIMAL: /* \d */
! 2324: /* can't be a letter, mark, separator, pontuation, symbol or other */
! 2325: if ((type2 == XML_REGEXP_NOTDECIMAL) ||
! 2326: (type2 == XML_REGEXP_REALCHAR) ||
! 2327: ((type2 >= XML_REGEXP_LETTER) &&
! 2328: (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
! 2329: ((type2 >= XML_REGEXP_MARK) &&
! 2330: (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
! 2331: ((type2 >= XML_REGEXP_PUNCT) &&
! 2332: (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
! 2333: ((type2 >= XML_REGEXP_SEPAR) &&
! 2334: (type2 <= XML_REGEXP_SEPAR_PARA)) ||
! 2335: ((type2 >= XML_REGEXP_SYMBOL) &&
! 2336: (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
! 2337: ((type2 >= XML_REGEXP_OTHER) &&
! 2338: (type2 <= XML_REGEXP_OTHER_NA))
! 2339: )return(0);
! 2340: break;
! 2341: case XML_REGEXP_NOTDECIMAL: /* \D */
! 2342: break;
! 2343: case XML_REGEXP_REALCHAR: /* \w */
! 2344: /* can't be a mark, separator, pontuation, symbol or other */
! 2345: if ((type2 == XML_REGEXP_NOTDECIMAL) ||
! 2346: ((type2 >= XML_REGEXP_MARK) &&
! 2347: (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
! 2348: ((type2 >= XML_REGEXP_PUNCT) &&
! 2349: (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
! 2350: ((type2 >= XML_REGEXP_SEPAR) &&
! 2351: (type2 <= XML_REGEXP_SEPAR_PARA)) ||
! 2352: ((type2 >= XML_REGEXP_SYMBOL) &&
! 2353: (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
! 2354: ((type2 >= XML_REGEXP_OTHER) &&
! 2355: (type2 <= XML_REGEXP_OTHER_NA))
! 2356: )return(0);
! 2357: break;
! 2358: case XML_REGEXP_NOTREALCHAR: /* \W */
! 2359: break;
! 2360: /*
! 2361: * at that point we know both type 1 and type2 are from
! 2362: * character categories are ordered and are different,
! 2363: * it becomes simple because this is a partition
! 2364: */
! 2365: case XML_REGEXP_LETTER:
! 2366: if (type2 <= XML_REGEXP_LETTER_OTHERS)
! 2367: return(1);
! 2368: return(0);
! 2369: case XML_REGEXP_LETTER_UPPERCASE:
! 2370: case XML_REGEXP_LETTER_LOWERCASE:
! 2371: case XML_REGEXP_LETTER_TITLECASE:
! 2372: case XML_REGEXP_LETTER_MODIFIER:
! 2373: case XML_REGEXP_LETTER_OTHERS:
! 2374: return(0);
! 2375: case XML_REGEXP_MARK:
! 2376: if (type2 <= XML_REGEXP_MARK_ENCLOSING)
! 2377: return(1);
! 2378: return(0);
! 2379: case XML_REGEXP_MARK_NONSPACING:
! 2380: case XML_REGEXP_MARK_SPACECOMBINING:
! 2381: case XML_REGEXP_MARK_ENCLOSING:
! 2382: return(0);
! 2383: case XML_REGEXP_NUMBER:
! 2384: if (type2 <= XML_REGEXP_NUMBER_OTHERS)
! 2385: return(1);
! 2386: return(0);
! 2387: case XML_REGEXP_NUMBER_DECIMAL:
! 2388: case XML_REGEXP_NUMBER_LETTER:
! 2389: case XML_REGEXP_NUMBER_OTHERS:
! 2390: return(0);
! 2391: case XML_REGEXP_PUNCT:
! 2392: if (type2 <= XML_REGEXP_PUNCT_OTHERS)
! 2393: return(1);
! 2394: return(0);
! 2395: case XML_REGEXP_PUNCT_CONNECTOR:
! 2396: case XML_REGEXP_PUNCT_DASH:
! 2397: case XML_REGEXP_PUNCT_OPEN:
! 2398: case XML_REGEXP_PUNCT_CLOSE:
! 2399: case XML_REGEXP_PUNCT_INITQUOTE:
! 2400: case XML_REGEXP_PUNCT_FINQUOTE:
! 2401: case XML_REGEXP_PUNCT_OTHERS:
! 2402: return(0);
! 2403: case XML_REGEXP_SEPAR:
! 2404: if (type2 <= XML_REGEXP_SEPAR_PARA)
! 2405: return(1);
! 2406: return(0);
! 2407: case XML_REGEXP_SEPAR_SPACE:
! 2408: case XML_REGEXP_SEPAR_LINE:
! 2409: case XML_REGEXP_SEPAR_PARA:
! 2410: return(0);
! 2411: case XML_REGEXP_SYMBOL:
! 2412: if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
! 2413: return(1);
! 2414: return(0);
! 2415: case XML_REGEXP_SYMBOL_MATH:
! 2416: case XML_REGEXP_SYMBOL_CURRENCY:
! 2417: case XML_REGEXP_SYMBOL_MODIFIER:
! 2418: case XML_REGEXP_SYMBOL_OTHERS:
! 2419: return(0);
! 2420: case XML_REGEXP_OTHER:
! 2421: if (type2 <= XML_REGEXP_OTHER_NA)
! 2422: return(1);
! 2423: return(0);
! 2424: case XML_REGEXP_OTHER_CONTROL:
! 2425: case XML_REGEXP_OTHER_FORMAT:
! 2426: case XML_REGEXP_OTHER_PRIVATE:
! 2427: case XML_REGEXP_OTHER_NA:
! 2428: return(0);
! 2429: default:
! 2430: break;
! 2431: }
! 2432: return(1);
! 2433: }
! 2434:
! 2435: /**
! 2436: * xmlFAEqualAtoms:
! 2437: * @atom1: an atom
! 2438: * @atom2: an atom
! 2439: * @deep: if not set only compare string pointers
! 2440: *
! 2441: * Compares two atoms to check whether they are the same exactly
! 2442: * this is used to remove equivalent transitions
! 2443: *
! 2444: * Returns 1 if same and 0 otherwise
! 2445: */
! 2446: static int
! 2447: xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
! 2448: int ret = 0;
! 2449:
! 2450: if (atom1 == atom2)
! 2451: return(1);
! 2452: if ((atom1 == NULL) || (atom2 == NULL))
! 2453: return(0);
! 2454:
! 2455: if (atom1->type != atom2->type)
! 2456: return(0);
! 2457: switch (atom1->type) {
! 2458: case XML_REGEXP_EPSILON:
! 2459: ret = 0;
! 2460: break;
! 2461: case XML_REGEXP_STRING:
! 2462: if (!deep)
! 2463: ret = (atom1->valuep == atom2->valuep);
! 2464: else
! 2465: ret = xmlStrEqual((xmlChar *)atom1->valuep,
! 2466: (xmlChar *)atom2->valuep);
! 2467: break;
! 2468: case XML_REGEXP_CHARVAL:
! 2469: ret = (atom1->codepoint == atom2->codepoint);
! 2470: break;
! 2471: case XML_REGEXP_RANGES:
! 2472: /* too hard to do in the general case */
! 2473: ret = 0;
! 2474: default:
! 2475: break;
! 2476: }
! 2477: return(ret);
! 2478: }
! 2479:
! 2480: /**
! 2481: * xmlFACompareAtoms:
! 2482: * @atom1: an atom
! 2483: * @atom2: an atom
! 2484: * @deep: if not set only compare string pointers
! 2485: *
! 2486: * Compares two atoms to check whether they intersect in some ways,
! 2487: * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
! 2488: *
! 2489: * Returns 1 if yes and 0 otherwise
! 2490: */
! 2491: static int
! 2492: xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
! 2493: int ret = 1;
! 2494:
! 2495: if (atom1 == atom2)
! 2496: return(1);
! 2497: if ((atom1 == NULL) || (atom2 == NULL))
! 2498: return(0);
! 2499:
! 2500: if ((atom1->type == XML_REGEXP_ANYCHAR) ||
! 2501: (atom2->type == XML_REGEXP_ANYCHAR))
! 2502: return(1);
! 2503:
! 2504: if (atom1->type > atom2->type) {
! 2505: xmlRegAtomPtr tmp;
! 2506: tmp = atom1;
! 2507: atom1 = atom2;
! 2508: atom2 = tmp;
! 2509: }
! 2510: if (atom1->type != atom2->type) {
! 2511: ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
! 2512: /* if they can't intersect at the type level break now */
! 2513: if (ret == 0)
! 2514: return(0);
! 2515: }
! 2516: switch (atom1->type) {
! 2517: case XML_REGEXP_STRING:
! 2518: if (!deep)
! 2519: ret = (atom1->valuep != atom2->valuep);
! 2520: else
! 2521: ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
! 2522: (xmlChar *)atom2->valuep);
! 2523: break;
! 2524: case XML_REGEXP_EPSILON:
! 2525: goto not_determinist;
! 2526: case XML_REGEXP_CHARVAL:
! 2527: if (atom2->type == XML_REGEXP_CHARVAL) {
! 2528: ret = (atom1->codepoint == atom2->codepoint);
! 2529: } else {
! 2530: ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
! 2531: if (ret < 0)
! 2532: ret = 1;
! 2533: }
! 2534: break;
! 2535: case XML_REGEXP_RANGES:
! 2536: if (atom2->type == XML_REGEXP_RANGES) {
! 2537: int i, j, res;
! 2538: xmlRegRangePtr r1, r2;
! 2539:
! 2540: /*
! 2541: * need to check that none of the ranges eventually matches
! 2542: */
! 2543: for (i = 0;i < atom1->nbRanges;i++) {
! 2544: for (j = 0;j < atom2->nbRanges;j++) {
! 2545: r1 = atom1->ranges[i];
! 2546: r2 = atom2->ranges[j];
! 2547: res = xmlFACompareRanges(r1, r2);
! 2548: if (res == 1) {
! 2549: ret = 1;
! 2550: goto done;
! 2551: }
! 2552: }
! 2553: }
! 2554: ret = 0;
! 2555: }
! 2556: break;
! 2557: default:
! 2558: goto not_determinist;
! 2559: }
! 2560: done:
! 2561: if (atom1->neg != atom2->neg) {
! 2562: ret = !ret;
! 2563: }
! 2564: if (ret == 0)
! 2565: return(0);
! 2566: not_determinist:
! 2567: return(1);
! 2568: }
! 2569:
! 2570: /**
! 2571: * xmlFARecurseDeterminism:
! 2572: * @ctxt: a regexp parser context
! 2573: *
! 2574: * Check whether the associated regexp is determinist,
! 2575: * should be called after xmlFAEliminateEpsilonTransitions()
! 2576: *
! 2577: */
! 2578: static int
! 2579: xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
! 2580: int to, xmlRegAtomPtr atom) {
! 2581: int ret = 1;
! 2582: int res;
! 2583: int transnr, nbTrans;
! 2584: xmlRegTransPtr t1;
! 2585: int deep = 1;
! 2586:
! 2587: if (state == NULL)
! 2588: return(ret);
! 2589:
! 2590: if (ctxt->flags & AM_AUTOMATA_RNG)
! 2591: deep = 0;
! 2592:
! 2593: /*
! 2594: * don't recurse on transitions potentially added in the course of
! 2595: * the elimination.
! 2596: */
! 2597: nbTrans = state->nbTrans;
! 2598: for (transnr = 0;transnr < nbTrans;transnr++) {
! 2599: t1 = &(state->trans[transnr]);
! 2600: /*
! 2601: * check transitions conflicting with the one looked at
! 2602: */
! 2603: if (t1->atom == NULL) {
! 2604: if (t1->to < 0)
! 2605: continue;
! 2606: res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
! 2607: to, atom);
! 2608: if (res == 0) {
! 2609: ret = 0;
! 2610: /* t1->nd = 1; */
! 2611: }
! 2612: continue;
! 2613: }
! 2614: if (t1->to != to)
! 2615: continue;
! 2616: if (xmlFACompareAtoms(t1->atom, atom, deep)) {
! 2617: ret = 0;
! 2618: /* mark the transition as non-deterministic */
! 2619: t1->nd = 1;
! 2620: }
! 2621: }
! 2622: return(ret);
! 2623: }
! 2624:
! 2625: /**
! 2626: * xmlFAComputesDeterminism:
! 2627: * @ctxt: a regexp parser context
! 2628: *
! 2629: * Check whether the associated regexp is determinist,
! 2630: * should be called after xmlFAEliminateEpsilonTransitions()
! 2631: *
! 2632: */
! 2633: static int
! 2634: xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
! 2635: int statenr, transnr;
! 2636: xmlRegStatePtr state;
! 2637: xmlRegTransPtr t1, t2, last;
! 2638: int i;
! 2639: int ret = 1;
! 2640: int deep = 1;
! 2641:
! 2642: #ifdef DEBUG_REGEXP_GRAPH
! 2643: printf("xmlFAComputesDeterminism\n");
! 2644: xmlRegPrintCtxt(stdout, ctxt);
! 2645: #endif
! 2646: if (ctxt->determinist != -1)
! 2647: return(ctxt->determinist);
! 2648:
! 2649: if (ctxt->flags & AM_AUTOMATA_RNG)
! 2650: deep = 0;
! 2651:
! 2652: /*
! 2653: * First cleanup the automata removing cancelled transitions
! 2654: */
! 2655: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 2656: state = ctxt->states[statenr];
! 2657: if (state == NULL)
! 2658: continue;
! 2659: if (state->nbTrans < 2)
! 2660: continue;
! 2661: for (transnr = 0;transnr < state->nbTrans;transnr++) {
! 2662: t1 = &(state->trans[transnr]);
! 2663: /*
! 2664: * Determinism checks in case of counted or all transitions
! 2665: * will have to be handled separately
! 2666: */
! 2667: if (t1->atom == NULL) {
! 2668: /* t1->nd = 1; */
! 2669: continue;
! 2670: }
! 2671: if (t1->to == -1) /* eliminated */
! 2672: continue;
! 2673: for (i = 0;i < transnr;i++) {
! 2674: t2 = &(state->trans[i]);
! 2675: if (t2->to == -1) /* eliminated */
! 2676: continue;
! 2677: if (t2->atom != NULL) {
! 2678: if (t1->to == t2->to) {
! 2679: /*
! 2680: * Here we use deep because we want to keep the
! 2681: * transitions which indicate a conflict
! 2682: */
! 2683: if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
! 2684: (t1->counter == t2->counter) &&
! 2685: (t1->count == t2->count))
! 2686: t2->to = -1; /* eliminated */
! 2687: }
! 2688: }
! 2689: }
! 2690: }
! 2691: }
! 2692:
! 2693: /*
! 2694: * Check for all states that there aren't 2 transitions
! 2695: * with the same atom and a different target.
! 2696: */
! 2697: for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
! 2698: state = ctxt->states[statenr];
! 2699: if (state == NULL)
! 2700: continue;
! 2701: if (state->nbTrans < 2)
! 2702: continue;
! 2703: last = NULL;
! 2704: for (transnr = 0;transnr < state->nbTrans;transnr++) {
! 2705: t1 = &(state->trans[transnr]);
! 2706: /*
! 2707: * Determinism checks in case of counted or all transitions
! 2708: * will have to be handled separately
! 2709: */
! 2710: if (t1->atom == NULL) {
! 2711: continue;
! 2712: }
! 2713: if (t1->to == -1) /* eliminated */
! 2714: continue;
! 2715: for (i = 0;i < transnr;i++) {
! 2716: t2 = &(state->trans[i]);
! 2717: if (t2->to == -1) /* eliminated */
! 2718: continue;
! 2719: if (t2->atom != NULL) {
! 2720: /*
! 2721: * But here we don't use deep because we want to
! 2722: * find transitions which indicate a conflict
! 2723: */
! 2724: if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
! 2725: ret = 0;
! 2726: /* mark the transitions as non-deterministic ones */
! 2727: t1->nd = 1;
! 2728: t2->nd = 1;
! 2729: last = t1;
! 2730: }
! 2731: } else if (t1->to != -1) {
! 2732: /*
! 2733: * do the closure in case of remaining specific
! 2734: * epsilon transitions like choices or all
! 2735: */
! 2736: ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
! 2737: t2->to, t2->atom);
! 2738: /* don't shortcut the computation so all non deterministic
! 2739: transition get marked down
! 2740: if (ret == 0)
! 2741: return(0);
! 2742: */
! 2743: if (ret == 0) {
! 2744: t1->nd = 1;
! 2745: /* t2->nd = 1; */
! 2746: last = t1;
! 2747: }
! 2748: }
! 2749: }
! 2750: /* don't shortcut the computation so all non deterministic
! 2751: transition get marked down
! 2752: if (ret == 0)
! 2753: break; */
! 2754: }
! 2755:
! 2756: /*
! 2757: * mark specifically the last non-deterministic transition
! 2758: * from a state since there is no need to set-up rollback
! 2759: * from it
! 2760: */
! 2761: if (last != NULL) {
! 2762: last->nd = 2;
! 2763: }
! 2764:
! 2765: /* don't shortcut the computation so all non deterministic
! 2766: transition get marked down
! 2767: if (ret == 0)
! 2768: break; */
! 2769: }
! 2770:
! 2771: ctxt->determinist = ret;
! 2772: return(ret);
! 2773: }
! 2774:
! 2775: /************************************************************************
! 2776: * *
! 2777: * Routines to check input against transition atoms *
! 2778: * *
! 2779: ************************************************************************/
! 2780:
! 2781: static int
! 2782: xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
! 2783: int start, int end, const xmlChar *blockName) {
! 2784: int ret = 0;
! 2785:
! 2786: switch (type) {
! 2787: case XML_REGEXP_STRING:
! 2788: case XML_REGEXP_SUBREG:
! 2789: case XML_REGEXP_RANGES:
! 2790: case XML_REGEXP_EPSILON:
! 2791: return(-1);
! 2792: case XML_REGEXP_ANYCHAR:
! 2793: ret = ((codepoint != '\n') && (codepoint != '\r'));
! 2794: break;
! 2795: case XML_REGEXP_CHARVAL:
! 2796: ret = ((codepoint >= start) && (codepoint <= end));
! 2797: break;
! 2798: case XML_REGEXP_NOTSPACE:
! 2799: neg = !neg;
! 2800: case XML_REGEXP_ANYSPACE:
! 2801: ret = ((codepoint == '\n') || (codepoint == '\r') ||
! 2802: (codepoint == '\t') || (codepoint == ' '));
! 2803: break;
! 2804: case XML_REGEXP_NOTINITNAME:
! 2805: neg = !neg;
! 2806: case XML_REGEXP_INITNAME:
! 2807: ret = (IS_LETTER(codepoint) ||
! 2808: (codepoint == '_') || (codepoint == ':'));
! 2809: break;
! 2810: case XML_REGEXP_NOTNAMECHAR:
! 2811: neg = !neg;
! 2812: case XML_REGEXP_NAMECHAR:
! 2813: ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
! 2814: (codepoint == '.') || (codepoint == '-') ||
! 2815: (codepoint == '_') || (codepoint == ':') ||
! 2816: IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
! 2817: break;
! 2818: case XML_REGEXP_NOTDECIMAL:
! 2819: neg = !neg;
! 2820: case XML_REGEXP_DECIMAL:
! 2821: ret = xmlUCSIsCatNd(codepoint);
! 2822: break;
! 2823: case XML_REGEXP_REALCHAR:
! 2824: neg = !neg;
! 2825: case XML_REGEXP_NOTREALCHAR:
! 2826: ret = xmlUCSIsCatP(codepoint);
! 2827: if (ret == 0)
! 2828: ret = xmlUCSIsCatZ(codepoint);
! 2829: if (ret == 0)
! 2830: ret = xmlUCSIsCatC(codepoint);
! 2831: break;
! 2832: case XML_REGEXP_LETTER:
! 2833: ret = xmlUCSIsCatL(codepoint);
! 2834: break;
! 2835: case XML_REGEXP_LETTER_UPPERCASE:
! 2836: ret = xmlUCSIsCatLu(codepoint);
! 2837: break;
! 2838: case XML_REGEXP_LETTER_LOWERCASE:
! 2839: ret = xmlUCSIsCatLl(codepoint);
! 2840: break;
! 2841: case XML_REGEXP_LETTER_TITLECASE:
! 2842: ret = xmlUCSIsCatLt(codepoint);
! 2843: break;
! 2844: case XML_REGEXP_LETTER_MODIFIER:
! 2845: ret = xmlUCSIsCatLm(codepoint);
! 2846: break;
! 2847: case XML_REGEXP_LETTER_OTHERS:
! 2848: ret = xmlUCSIsCatLo(codepoint);
! 2849: break;
! 2850: case XML_REGEXP_MARK:
! 2851: ret = xmlUCSIsCatM(codepoint);
! 2852: break;
! 2853: case XML_REGEXP_MARK_NONSPACING:
! 2854: ret = xmlUCSIsCatMn(codepoint);
! 2855: break;
! 2856: case XML_REGEXP_MARK_SPACECOMBINING:
! 2857: ret = xmlUCSIsCatMc(codepoint);
! 2858: break;
! 2859: case XML_REGEXP_MARK_ENCLOSING:
! 2860: ret = xmlUCSIsCatMe(codepoint);
! 2861: break;
! 2862: case XML_REGEXP_NUMBER:
! 2863: ret = xmlUCSIsCatN(codepoint);
! 2864: break;
! 2865: case XML_REGEXP_NUMBER_DECIMAL:
! 2866: ret = xmlUCSIsCatNd(codepoint);
! 2867: break;
! 2868: case XML_REGEXP_NUMBER_LETTER:
! 2869: ret = xmlUCSIsCatNl(codepoint);
! 2870: break;
! 2871: case XML_REGEXP_NUMBER_OTHERS:
! 2872: ret = xmlUCSIsCatNo(codepoint);
! 2873: break;
! 2874: case XML_REGEXP_PUNCT:
! 2875: ret = xmlUCSIsCatP(codepoint);
! 2876: break;
! 2877: case XML_REGEXP_PUNCT_CONNECTOR:
! 2878: ret = xmlUCSIsCatPc(codepoint);
! 2879: break;
! 2880: case XML_REGEXP_PUNCT_DASH:
! 2881: ret = xmlUCSIsCatPd(codepoint);
! 2882: break;
! 2883: case XML_REGEXP_PUNCT_OPEN:
! 2884: ret = xmlUCSIsCatPs(codepoint);
! 2885: break;
! 2886: case XML_REGEXP_PUNCT_CLOSE:
! 2887: ret = xmlUCSIsCatPe(codepoint);
! 2888: break;
! 2889: case XML_REGEXP_PUNCT_INITQUOTE:
! 2890: ret = xmlUCSIsCatPi(codepoint);
! 2891: break;
! 2892: case XML_REGEXP_PUNCT_FINQUOTE:
! 2893: ret = xmlUCSIsCatPf(codepoint);
! 2894: break;
! 2895: case XML_REGEXP_PUNCT_OTHERS:
! 2896: ret = xmlUCSIsCatPo(codepoint);
! 2897: break;
! 2898: case XML_REGEXP_SEPAR:
! 2899: ret = xmlUCSIsCatZ(codepoint);
! 2900: break;
! 2901: case XML_REGEXP_SEPAR_SPACE:
! 2902: ret = xmlUCSIsCatZs(codepoint);
! 2903: break;
! 2904: case XML_REGEXP_SEPAR_LINE:
! 2905: ret = xmlUCSIsCatZl(codepoint);
! 2906: break;
! 2907: case XML_REGEXP_SEPAR_PARA:
! 2908: ret = xmlUCSIsCatZp(codepoint);
! 2909: break;
! 2910: case XML_REGEXP_SYMBOL:
! 2911: ret = xmlUCSIsCatS(codepoint);
! 2912: break;
! 2913: case XML_REGEXP_SYMBOL_MATH:
! 2914: ret = xmlUCSIsCatSm(codepoint);
! 2915: break;
! 2916: case XML_REGEXP_SYMBOL_CURRENCY:
! 2917: ret = xmlUCSIsCatSc(codepoint);
! 2918: break;
! 2919: case XML_REGEXP_SYMBOL_MODIFIER:
! 2920: ret = xmlUCSIsCatSk(codepoint);
! 2921: break;
! 2922: case XML_REGEXP_SYMBOL_OTHERS:
! 2923: ret = xmlUCSIsCatSo(codepoint);
! 2924: break;
! 2925: case XML_REGEXP_OTHER:
! 2926: ret = xmlUCSIsCatC(codepoint);
! 2927: break;
! 2928: case XML_REGEXP_OTHER_CONTROL:
! 2929: ret = xmlUCSIsCatCc(codepoint);
! 2930: break;
! 2931: case XML_REGEXP_OTHER_FORMAT:
! 2932: ret = xmlUCSIsCatCf(codepoint);
! 2933: break;
! 2934: case XML_REGEXP_OTHER_PRIVATE:
! 2935: ret = xmlUCSIsCatCo(codepoint);
! 2936: break;
! 2937: case XML_REGEXP_OTHER_NA:
! 2938: /* ret = xmlUCSIsCatCn(codepoint); */
! 2939: /* Seems it doesn't exist anymore in recent Unicode releases */
! 2940: ret = 0;
! 2941: break;
! 2942: case XML_REGEXP_BLOCK_NAME:
! 2943: ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
! 2944: break;
! 2945: }
! 2946: if (neg)
! 2947: return(!ret);
! 2948: return(ret);
! 2949: }
! 2950:
! 2951: static int
! 2952: xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
! 2953: int i, ret = 0;
! 2954: xmlRegRangePtr range;
! 2955:
! 2956: if ((atom == NULL) || (!IS_CHAR(codepoint)))
! 2957: return(-1);
! 2958:
! 2959: switch (atom->type) {
! 2960: case XML_REGEXP_SUBREG:
! 2961: case XML_REGEXP_EPSILON:
! 2962: return(-1);
! 2963: case XML_REGEXP_CHARVAL:
! 2964: return(codepoint == atom->codepoint);
! 2965: case XML_REGEXP_RANGES: {
! 2966: int accept = 0;
! 2967:
! 2968: for (i = 0;i < atom->nbRanges;i++) {
! 2969: range = atom->ranges[i];
! 2970: if (range->neg == 2) {
! 2971: ret = xmlRegCheckCharacterRange(range->type, codepoint,
! 2972: 0, range->start, range->end,
! 2973: range->blockName);
! 2974: if (ret != 0)
! 2975: return(0); /* excluded char */
! 2976: } else if (range->neg) {
! 2977: ret = xmlRegCheckCharacterRange(range->type, codepoint,
! 2978: 0, range->start, range->end,
! 2979: range->blockName);
! 2980: if (ret == 0)
! 2981: accept = 1;
! 2982: else
! 2983: return(0);
! 2984: } else {
! 2985: ret = xmlRegCheckCharacterRange(range->type, codepoint,
! 2986: 0, range->start, range->end,
! 2987: range->blockName);
! 2988: if (ret != 0)
! 2989: accept = 1; /* might still be excluded */
! 2990: }
! 2991: }
! 2992: return(accept);
! 2993: }
! 2994: case XML_REGEXP_STRING:
! 2995: printf("TODO: XML_REGEXP_STRING\n");
! 2996: return(-1);
! 2997: case XML_REGEXP_ANYCHAR:
! 2998: case XML_REGEXP_ANYSPACE:
! 2999: case XML_REGEXP_NOTSPACE:
! 3000: case XML_REGEXP_INITNAME:
! 3001: case XML_REGEXP_NOTINITNAME:
! 3002: case XML_REGEXP_NAMECHAR:
! 3003: case XML_REGEXP_NOTNAMECHAR:
! 3004: case XML_REGEXP_DECIMAL:
! 3005: case XML_REGEXP_NOTDECIMAL:
! 3006: case XML_REGEXP_REALCHAR:
! 3007: case XML_REGEXP_NOTREALCHAR:
! 3008: case XML_REGEXP_LETTER:
! 3009: case XML_REGEXP_LETTER_UPPERCASE:
! 3010: case XML_REGEXP_LETTER_LOWERCASE:
! 3011: case XML_REGEXP_LETTER_TITLECASE:
! 3012: case XML_REGEXP_LETTER_MODIFIER:
! 3013: case XML_REGEXP_LETTER_OTHERS:
! 3014: case XML_REGEXP_MARK:
! 3015: case XML_REGEXP_MARK_NONSPACING:
! 3016: case XML_REGEXP_MARK_SPACECOMBINING:
! 3017: case XML_REGEXP_MARK_ENCLOSING:
! 3018: case XML_REGEXP_NUMBER:
! 3019: case XML_REGEXP_NUMBER_DECIMAL:
! 3020: case XML_REGEXP_NUMBER_LETTER:
! 3021: case XML_REGEXP_NUMBER_OTHERS:
! 3022: case XML_REGEXP_PUNCT:
! 3023: case XML_REGEXP_PUNCT_CONNECTOR:
! 3024: case XML_REGEXP_PUNCT_DASH:
! 3025: case XML_REGEXP_PUNCT_OPEN:
! 3026: case XML_REGEXP_PUNCT_CLOSE:
! 3027: case XML_REGEXP_PUNCT_INITQUOTE:
! 3028: case XML_REGEXP_PUNCT_FINQUOTE:
! 3029: case XML_REGEXP_PUNCT_OTHERS:
! 3030: case XML_REGEXP_SEPAR:
! 3031: case XML_REGEXP_SEPAR_SPACE:
! 3032: case XML_REGEXP_SEPAR_LINE:
! 3033: case XML_REGEXP_SEPAR_PARA:
! 3034: case XML_REGEXP_SYMBOL:
! 3035: case XML_REGEXP_SYMBOL_MATH:
! 3036: case XML_REGEXP_SYMBOL_CURRENCY:
! 3037: case XML_REGEXP_SYMBOL_MODIFIER:
! 3038: case XML_REGEXP_SYMBOL_OTHERS:
! 3039: case XML_REGEXP_OTHER:
! 3040: case XML_REGEXP_OTHER_CONTROL:
! 3041: case XML_REGEXP_OTHER_FORMAT:
! 3042: case XML_REGEXP_OTHER_PRIVATE:
! 3043: case XML_REGEXP_OTHER_NA:
! 3044: case XML_REGEXP_BLOCK_NAME:
! 3045: ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
! 3046: (const xmlChar *)atom->valuep);
! 3047: if (atom->neg)
! 3048: ret = !ret;
! 3049: break;
! 3050: }
! 3051: return(ret);
! 3052: }
! 3053:
! 3054: /************************************************************************
! 3055: * *
! 3056: * Saving and restoring state of an execution context *
! 3057: * *
! 3058: ************************************************************************/
! 3059:
! 3060: #ifdef DEBUG_REGEXP_EXEC
! 3061: static void
! 3062: xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
! 3063: printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
! 3064: if (exec->inputStack != NULL) {
! 3065: int i;
! 3066: printf(": ");
! 3067: for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
! 3068: printf("%s ", (const char *)
! 3069: exec->inputStack[exec->inputStackNr - (i + 1)].value);
! 3070: } else {
! 3071: printf(": %s", &(exec->inputString[exec->index]));
! 3072: }
! 3073: printf("\n");
! 3074: }
! 3075: #endif
! 3076:
! 3077: static void
! 3078: xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
! 3079: #ifdef DEBUG_REGEXP_EXEC
! 3080: printf("saving ");
! 3081: exec->transno++;
! 3082: xmlFARegDebugExec(exec);
! 3083: exec->transno--;
! 3084: #endif
! 3085: #ifdef MAX_PUSH
! 3086: if (exec->nbPush > MAX_PUSH) {
! 3087: return;
! 3088: }
! 3089: exec->nbPush++;
! 3090: #endif
! 3091:
! 3092: if (exec->maxRollbacks == 0) {
! 3093: exec->maxRollbacks = 4;
! 3094: exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
! 3095: sizeof(xmlRegExecRollback));
! 3096: if (exec->rollbacks == NULL) {
! 3097: xmlRegexpErrMemory(NULL, "saving regexp");
! 3098: exec->maxRollbacks = 0;
! 3099: return;
! 3100: }
! 3101: memset(exec->rollbacks, 0,
! 3102: exec->maxRollbacks * sizeof(xmlRegExecRollback));
! 3103: } else if (exec->nbRollbacks >= exec->maxRollbacks) {
! 3104: xmlRegExecRollback *tmp;
! 3105: int len = exec->maxRollbacks;
! 3106:
! 3107: exec->maxRollbacks *= 2;
! 3108: tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
! 3109: exec->maxRollbacks * sizeof(xmlRegExecRollback));
! 3110: if (tmp == NULL) {
! 3111: xmlRegexpErrMemory(NULL, "saving regexp");
! 3112: exec->maxRollbacks /= 2;
! 3113: return;
! 3114: }
! 3115: exec->rollbacks = tmp;
! 3116: tmp = &exec->rollbacks[len];
! 3117: memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
! 3118: }
! 3119: exec->rollbacks[exec->nbRollbacks].state = exec->state;
! 3120: exec->rollbacks[exec->nbRollbacks].index = exec->index;
! 3121: exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
! 3122: if (exec->comp->nbCounters > 0) {
! 3123: if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
! 3124: exec->rollbacks[exec->nbRollbacks].counts = (int *)
! 3125: xmlMalloc(exec->comp->nbCounters * sizeof(int));
! 3126: if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
! 3127: xmlRegexpErrMemory(NULL, "saving regexp");
! 3128: exec->status = -5;
! 3129: return;
! 3130: }
! 3131: }
! 3132: memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
! 3133: exec->comp->nbCounters * sizeof(int));
! 3134: }
! 3135: exec->nbRollbacks++;
! 3136: }
! 3137:
! 3138: static void
! 3139: xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
! 3140: if (exec->nbRollbacks <= 0) {
! 3141: exec->status = -1;
! 3142: #ifdef DEBUG_REGEXP_EXEC
! 3143: printf("rollback failed on empty stack\n");
! 3144: #endif
! 3145: return;
! 3146: }
! 3147: exec->nbRollbacks--;
! 3148: exec->state = exec->rollbacks[exec->nbRollbacks].state;
! 3149: exec->index = exec->rollbacks[exec->nbRollbacks].index;
! 3150: exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
! 3151: if (exec->comp->nbCounters > 0) {
! 3152: if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
! 3153: fprintf(stderr, "exec save: allocation failed");
! 3154: exec->status = -6;
! 3155: return;
! 3156: }
! 3157: memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
! 3158: exec->comp->nbCounters * sizeof(int));
! 3159: }
! 3160:
! 3161: #ifdef DEBUG_REGEXP_EXEC
! 3162: printf("restored ");
! 3163: xmlFARegDebugExec(exec);
! 3164: #endif
! 3165: }
! 3166:
! 3167: /************************************************************************
! 3168: * *
! 3169: * Verifier, running an input against a compiled regexp *
! 3170: * *
! 3171: ************************************************************************/
! 3172:
! 3173: static int
! 3174: xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
! 3175: xmlRegExecCtxt execval;
! 3176: xmlRegExecCtxtPtr exec = &execval;
! 3177: int ret, codepoint = 0, len, deter;
! 3178:
! 3179: exec->inputString = content;
! 3180: exec->index = 0;
! 3181: exec->nbPush = 0;
! 3182: exec->determinist = 1;
! 3183: exec->maxRollbacks = 0;
! 3184: exec->nbRollbacks = 0;
! 3185: exec->rollbacks = NULL;
! 3186: exec->status = 0;
! 3187: exec->comp = comp;
! 3188: exec->state = comp->states[0];
! 3189: exec->transno = 0;
! 3190: exec->transcount = 0;
! 3191: exec->inputStack = NULL;
! 3192: exec->inputStackMax = 0;
! 3193: if (comp->nbCounters > 0) {
! 3194: exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
! 3195: if (exec->counts == NULL) {
! 3196: xmlRegexpErrMemory(NULL, "running regexp");
! 3197: return(-1);
! 3198: }
! 3199: memset(exec->counts, 0, comp->nbCounters * sizeof(int));
! 3200: } else
! 3201: exec->counts = NULL;
! 3202: while ((exec->status == 0) &&
! 3203: ((exec->inputString[exec->index] != 0) ||
! 3204: ((exec->state != NULL) &&
! 3205: (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
! 3206: xmlRegTransPtr trans;
! 3207: xmlRegAtomPtr atom;
! 3208:
! 3209: /*
! 3210: * If end of input on non-terminal state, rollback, however we may
! 3211: * still have epsilon like transition for counted transitions
! 3212: * on counters, in that case don't break too early. Additionally,
! 3213: * if we are working on a range like "AB{0,2}", where B is not present,
! 3214: * we don't want to break.
! 3215: */
! 3216: len = 1;
! 3217: if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
! 3218: /*
! 3219: * if there is a transition, we must check if
! 3220: * atom allows minOccurs of 0
! 3221: */
! 3222: if (exec->transno < exec->state->nbTrans) {
! 3223: trans = &exec->state->trans[exec->transno];
! 3224: if (trans->to >=0) {
! 3225: atom = trans->atom;
! 3226: if (!((atom->min == 0) && (atom->max > 0)))
! 3227: goto rollback;
! 3228: }
! 3229: } else
! 3230: goto rollback;
! 3231: }
! 3232:
! 3233: exec->transcount = 0;
! 3234: for (;exec->transno < exec->state->nbTrans;exec->transno++) {
! 3235: trans = &exec->state->trans[exec->transno];
! 3236: if (trans->to < 0)
! 3237: continue;
! 3238: atom = trans->atom;
! 3239: ret = 0;
! 3240: deter = 1;
! 3241: if (trans->count >= 0) {
! 3242: int count;
! 3243: xmlRegCounterPtr counter;
! 3244:
! 3245: if (exec->counts == NULL) {
! 3246: exec->status = -1;
! 3247: goto error;
! 3248: }
! 3249: /*
! 3250: * A counted transition.
! 3251: */
! 3252:
! 3253: count = exec->counts[trans->count];
! 3254: counter = &exec->comp->counters[trans->count];
! 3255: #ifdef DEBUG_REGEXP_EXEC
! 3256: printf("testing count %d: val %d, min %d, max %d\n",
! 3257: trans->count, count, counter->min, counter->max);
! 3258: #endif
! 3259: ret = ((count >= counter->min) && (count <= counter->max));
! 3260: if ((ret) && (counter->min != counter->max))
! 3261: deter = 0;
! 3262: } else if (atom == NULL) {
! 3263: fprintf(stderr, "epsilon transition left at runtime\n");
! 3264: exec->status = -2;
! 3265: break;
! 3266: } else if (exec->inputString[exec->index] != 0) {
! 3267: codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
! 3268: ret = xmlRegCheckCharacter(atom, codepoint);
! 3269: if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
! 3270: xmlRegStatePtr to = comp->states[trans->to];
! 3271:
! 3272: /*
! 3273: * this is a multiple input sequence
! 3274: * If there is a counter associated increment it now.
! 3275: * before potentially saving and rollback
! 3276: * do not increment if the counter is already over the
! 3277: * maximum limit in which case get to next transition
! 3278: */
! 3279: if (trans->counter >= 0) {
! 3280: xmlRegCounterPtr counter;
! 3281:
! 3282: if ((exec->counts == NULL) ||
! 3283: (exec->comp == NULL) ||
! 3284: (exec->comp->counters == NULL)) {
! 3285: exec->status = -1;
! 3286: goto error;
! 3287: }
! 3288: counter = &exec->comp->counters[trans->counter];
! 3289: if (exec->counts[trans->counter] >= counter->max)
! 3290: continue; /* for loop on transitions */
! 3291:
! 3292: #ifdef DEBUG_REGEXP_EXEC
! 3293: printf("Increasing count %d\n", trans->counter);
! 3294: #endif
! 3295: exec->counts[trans->counter]++;
! 3296: }
! 3297: if (exec->state->nbTrans > exec->transno + 1) {
! 3298: xmlFARegExecSave(exec);
! 3299: }
! 3300: exec->transcount = 1;
! 3301: do {
! 3302: /*
! 3303: * Try to progress as much as possible on the input
! 3304: */
! 3305: if (exec->transcount == atom->max) {
! 3306: break;
! 3307: }
! 3308: exec->index += len;
! 3309: /*
! 3310: * End of input: stop here
! 3311: */
! 3312: if (exec->inputString[exec->index] == 0) {
! 3313: exec->index -= len;
! 3314: break;
! 3315: }
! 3316: if (exec->transcount >= atom->min) {
! 3317: int transno = exec->transno;
! 3318: xmlRegStatePtr state = exec->state;
! 3319:
! 3320: /*
! 3321: * The transition is acceptable save it
! 3322: */
! 3323: exec->transno = -1; /* trick */
! 3324: exec->state = to;
! 3325: xmlFARegExecSave(exec);
! 3326: exec->transno = transno;
! 3327: exec->state = state;
! 3328: }
! 3329: codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
! 3330: len);
! 3331: ret = xmlRegCheckCharacter(atom, codepoint);
! 3332: exec->transcount++;
! 3333: } while (ret == 1);
! 3334: if (exec->transcount < atom->min)
! 3335: ret = 0;
! 3336:
! 3337: /*
! 3338: * If the last check failed but one transition was found
! 3339: * possible, rollback
! 3340: */
! 3341: if (ret < 0)
! 3342: ret = 0;
! 3343: if (ret == 0) {
! 3344: goto rollback;
! 3345: }
! 3346: if (trans->counter >= 0) {
! 3347: if (exec->counts == NULL) {
! 3348: exec->status = -1;
! 3349: goto error;
! 3350: }
! 3351: #ifdef DEBUG_REGEXP_EXEC
! 3352: printf("Decreasing count %d\n", trans->counter);
! 3353: #endif
! 3354: exec->counts[trans->counter]--;
! 3355: }
! 3356: } else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
! 3357: /*
! 3358: * we don't match on the codepoint, but minOccurs of 0
! 3359: * says that's ok. Setting len to 0 inhibits stepping
! 3360: * over the codepoint.
! 3361: */
! 3362: exec->transcount = 1;
! 3363: len = 0;
! 3364: ret = 1;
! 3365: }
! 3366: } else if ((atom->min == 0) && (atom->max > 0)) {
! 3367: /* another spot to match when minOccurs is 0 */
! 3368: exec->transcount = 1;
! 3369: len = 0;
! 3370: ret = 1;
! 3371: }
! 3372: if (ret == 1) {
! 3373: if ((trans->nd == 1) ||
! 3374: ((trans->count >= 0) && (deter == 0) &&
! 3375: (exec->state->nbTrans > exec->transno + 1))) {
! 3376: #ifdef DEBUG_REGEXP_EXEC
! 3377: if (trans->nd == 1)
! 3378: printf("Saving on nd transition atom %d for %c at %d\n",
! 3379: trans->atom->no, codepoint, exec->index);
! 3380: else
! 3381: printf("Saving on counted transition count %d for %c at %d\n",
! 3382: trans->count, codepoint, exec->index);
! 3383: #endif
! 3384: xmlFARegExecSave(exec);
! 3385: }
! 3386: if (trans->counter >= 0) {
! 3387: xmlRegCounterPtr counter;
! 3388:
! 3389: /* make sure we don't go over the counter maximum value */
! 3390: if ((exec->counts == NULL) ||
! 3391: (exec->comp == NULL) ||
! 3392: (exec->comp->counters == NULL)) {
! 3393: exec->status = -1;
! 3394: goto error;
! 3395: }
! 3396: counter = &exec->comp->counters[trans->counter];
! 3397: if (exec->counts[trans->counter] >= counter->max)
! 3398: continue; /* for loop on transitions */
! 3399: #ifdef DEBUG_REGEXP_EXEC
! 3400: printf("Increasing count %d\n", trans->counter);
! 3401: #endif
! 3402: exec->counts[trans->counter]++;
! 3403: }
! 3404: if ((trans->count >= 0) &&
! 3405: (trans->count < REGEXP_ALL_COUNTER)) {
! 3406: if (exec->counts == NULL) {
! 3407: exec->status = -1;
! 3408: goto error;
! 3409: }
! 3410: #ifdef DEBUG_REGEXP_EXEC
! 3411: printf("resetting count %d on transition\n",
! 3412: trans->count);
! 3413: #endif
! 3414: exec->counts[trans->count] = 0;
! 3415: }
! 3416: #ifdef DEBUG_REGEXP_EXEC
! 3417: printf("entering state %d\n", trans->to);
! 3418: #endif
! 3419: exec->state = comp->states[trans->to];
! 3420: exec->transno = 0;
! 3421: if (trans->atom != NULL) {
! 3422: exec->index += len;
! 3423: }
! 3424: goto progress;
! 3425: } else if (ret < 0) {
! 3426: exec->status = -4;
! 3427: break;
! 3428: }
! 3429: }
! 3430: if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
! 3431: rollback:
! 3432: /*
! 3433: * Failed to find a way out
! 3434: */
! 3435: exec->determinist = 0;
! 3436: #ifdef DEBUG_REGEXP_EXEC
! 3437: printf("rollback from state %d on %d:%c\n", exec->state->no,
! 3438: codepoint,codepoint);
! 3439: #endif
! 3440: xmlFARegExecRollBack(exec);
! 3441: }
! 3442: progress:
! 3443: continue;
! 3444: }
! 3445: error:
! 3446: if (exec->rollbacks != NULL) {
! 3447: if (exec->counts != NULL) {
! 3448: int i;
! 3449:
! 3450: for (i = 0;i < exec->maxRollbacks;i++)
! 3451: if (exec->rollbacks[i].counts != NULL)
! 3452: xmlFree(exec->rollbacks[i].counts);
! 3453: }
! 3454: xmlFree(exec->rollbacks);
! 3455: }
! 3456: if (exec->counts != NULL)
! 3457: xmlFree(exec->counts);
! 3458: if (exec->status == 0)
! 3459: return(1);
! 3460: if (exec->status == -1) {
! 3461: if (exec->nbPush > MAX_PUSH)
! 3462: return(-1);
! 3463: return(0);
! 3464: }
! 3465: return(exec->status);
! 3466: }
! 3467:
! 3468: /************************************************************************
! 3469: * *
! 3470: * Progressive interface to the verifier one atom at a time *
! 3471: * *
! 3472: ************************************************************************/
! 3473: #ifdef DEBUG_ERR
! 3474: static void testerr(xmlRegExecCtxtPtr exec);
! 3475: #endif
! 3476:
! 3477: /**
! 3478: * xmlRegNewExecCtxt:
! 3479: * @comp: a precompiled regular expression
! 3480: * @callback: a callback function used for handling progresses in the
! 3481: * automata matching phase
! 3482: * @data: the context data associated to the callback in this context
! 3483: *
! 3484: * Build a context used for progressive evaluation of a regexp.
! 3485: *
! 3486: * Returns the new context
! 3487: */
! 3488: xmlRegExecCtxtPtr
! 3489: xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
! 3490: xmlRegExecCtxtPtr exec;
! 3491:
! 3492: if (comp == NULL)
! 3493: return(NULL);
! 3494: if ((comp->compact == NULL) && (comp->states == NULL))
! 3495: return(NULL);
! 3496: exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
! 3497: if (exec == NULL) {
! 3498: xmlRegexpErrMemory(NULL, "creating execution context");
! 3499: return(NULL);
! 3500: }
! 3501: memset(exec, 0, sizeof(xmlRegExecCtxt));
! 3502: exec->inputString = NULL;
! 3503: exec->index = 0;
! 3504: exec->determinist = 1;
! 3505: exec->maxRollbacks = 0;
! 3506: exec->nbRollbacks = 0;
! 3507: exec->rollbacks = NULL;
! 3508: exec->status = 0;
! 3509: exec->comp = comp;
! 3510: if (comp->compact == NULL)
! 3511: exec->state = comp->states[0];
! 3512: exec->transno = 0;
! 3513: exec->transcount = 0;
! 3514: exec->callback = callback;
! 3515: exec->data = data;
! 3516: if (comp->nbCounters > 0) {
! 3517: /*
! 3518: * For error handling, exec->counts is allocated twice the size
! 3519: * the second half is used to store the data in case of rollback
! 3520: */
! 3521: exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
! 3522: * 2);
! 3523: if (exec->counts == NULL) {
! 3524: xmlRegexpErrMemory(NULL, "creating execution context");
! 3525: xmlFree(exec);
! 3526: return(NULL);
! 3527: }
! 3528: memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
! 3529: exec->errCounts = &exec->counts[comp->nbCounters];
! 3530: } else {
! 3531: exec->counts = NULL;
! 3532: exec->errCounts = NULL;
! 3533: }
! 3534: exec->inputStackMax = 0;
! 3535: exec->inputStackNr = 0;
! 3536: exec->inputStack = NULL;
! 3537: exec->errStateNo = -1;
! 3538: exec->errString = NULL;
! 3539: exec->nbPush = 0;
! 3540: return(exec);
! 3541: }
! 3542:
! 3543: /**
! 3544: * xmlRegFreeExecCtxt:
! 3545: * @exec: a regular expression evaulation context
! 3546: *
! 3547: * Free the structures associated to a regular expression evaulation context.
! 3548: */
! 3549: void
! 3550: xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
! 3551: if (exec == NULL)
! 3552: return;
! 3553:
! 3554: if (exec->rollbacks != NULL) {
! 3555: if (exec->counts != NULL) {
! 3556: int i;
! 3557:
! 3558: for (i = 0;i < exec->maxRollbacks;i++)
! 3559: if (exec->rollbacks[i].counts != NULL)
! 3560: xmlFree(exec->rollbacks[i].counts);
! 3561: }
! 3562: xmlFree(exec->rollbacks);
! 3563: }
! 3564: if (exec->counts != NULL)
! 3565: xmlFree(exec->counts);
! 3566: if (exec->inputStack != NULL) {
! 3567: int i;
! 3568:
! 3569: for (i = 0;i < exec->inputStackNr;i++) {
! 3570: if (exec->inputStack[i].value != NULL)
! 3571: xmlFree(exec->inputStack[i].value);
! 3572: }
! 3573: xmlFree(exec->inputStack);
! 3574: }
! 3575: if (exec->errString != NULL)
! 3576: xmlFree(exec->errString);
! 3577: xmlFree(exec);
! 3578: }
! 3579:
! 3580: static void
! 3581: xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
! 3582: void *data) {
! 3583: #ifdef DEBUG_PUSH
! 3584: printf("saving value: %d:%s\n", exec->inputStackNr, value);
! 3585: #endif
! 3586: if (exec->inputStackMax == 0) {
! 3587: exec->inputStackMax = 4;
! 3588: exec->inputStack = (xmlRegInputTokenPtr)
! 3589: xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
! 3590: if (exec->inputStack == NULL) {
! 3591: xmlRegexpErrMemory(NULL, "pushing input string");
! 3592: exec->inputStackMax = 0;
! 3593: return;
! 3594: }
! 3595: } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
! 3596: xmlRegInputTokenPtr tmp;
! 3597:
! 3598: exec->inputStackMax *= 2;
! 3599: tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
! 3600: exec->inputStackMax * sizeof(xmlRegInputToken));
! 3601: if (tmp == NULL) {
! 3602: xmlRegexpErrMemory(NULL, "pushing input string");
! 3603: exec->inputStackMax /= 2;
! 3604: return;
! 3605: }
! 3606: exec->inputStack = tmp;
! 3607: }
! 3608: exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
! 3609: exec->inputStack[exec->inputStackNr].data = data;
! 3610: exec->inputStackNr++;
! 3611: exec->inputStack[exec->inputStackNr].value = NULL;
! 3612: exec->inputStack[exec->inputStackNr].data = NULL;
! 3613: }
! 3614:
! 3615: /**
! 3616: * xmlRegStrEqualWildcard:
! 3617: * @expStr: the string to be evaluated
! 3618: * @valStr: the validation string
! 3619: *
! 3620: * Checks if both strings are equal or have the same content. "*"
! 3621: * can be used as a wildcard in @valStr; "|" is used as a seperator of
! 3622: * substrings in both @expStr and @valStr.
! 3623: *
! 3624: * Returns 1 if the comparison is satisfied and the number of substrings
! 3625: * is equal, 0 otherwise.
! 3626: */
! 3627:
! 3628: static int
! 3629: xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
! 3630: if (expStr == valStr) return(1);
! 3631: if (expStr == NULL) return(0);
! 3632: if (valStr == NULL) return(0);
! 3633: do {
! 3634: /*
! 3635: * Eval if we have a wildcard for the current item.
! 3636: */
! 3637: if (*expStr != *valStr) {
! 3638: /* if one of them starts with a wildcard make valStr be it */
! 3639: if (*valStr == '*') {
! 3640: const xmlChar *tmp;
! 3641:
! 3642: tmp = valStr;
! 3643: valStr = expStr;
! 3644: expStr = tmp;
! 3645: }
! 3646: if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
! 3647: do {
! 3648: if (*valStr == XML_REG_STRING_SEPARATOR)
! 3649: break;
! 3650: valStr++;
! 3651: } while (*valStr != 0);
! 3652: continue;
! 3653: } else
! 3654: return(0);
! 3655: }
! 3656: expStr++;
! 3657: valStr++;
! 3658: } while (*valStr != 0);
! 3659: if (*expStr != 0)
! 3660: return (0);
! 3661: else
! 3662: return (1);
! 3663: }
! 3664:
! 3665: /**
! 3666: * xmlRegCompactPushString:
! 3667: * @exec: a regexp execution context
! 3668: * @comp: the precompiled exec with a compact table
! 3669: * @value: a string token input
! 3670: * @data: data associated to the token to reuse in callbacks
! 3671: *
! 3672: * Push one input token in the execution context
! 3673: *
! 3674: * Returns: 1 if the regexp reached a final state, 0 if non-final, and
! 3675: * a negative value in case of error.
! 3676: */
! 3677: static int
! 3678: xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
! 3679: xmlRegexpPtr comp,
! 3680: const xmlChar *value,
! 3681: void *data) {
! 3682: int state = exec->index;
! 3683: int i, target;
! 3684:
! 3685: if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
! 3686: return(-1);
! 3687:
! 3688: if (value == NULL) {
! 3689: /*
! 3690: * are we at a final state ?
! 3691: */
! 3692: if (comp->compact[state * (comp->nbstrings + 1)] ==
! 3693: XML_REGEXP_FINAL_STATE)
! 3694: return(1);
! 3695: return(0);
! 3696: }
! 3697:
! 3698: #ifdef DEBUG_PUSH
! 3699: printf("value pushed: %s\n", value);
! 3700: #endif
! 3701:
! 3702: /*
! 3703: * Examine all outside transitions from current state
! 3704: */
! 3705: for (i = 0;i < comp->nbstrings;i++) {
! 3706: target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
! 3707: if ((target > 0) && (target <= comp->nbstates)) {
! 3708: target--; /* to avoid 0 */
! 3709: if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
! 3710: exec->index = target;
! 3711: if ((exec->callback != NULL) && (comp->transdata != NULL)) {
! 3712: exec->callback(exec->data, value,
! 3713: comp->transdata[state * comp->nbstrings + i], data);
! 3714: }
! 3715: #ifdef DEBUG_PUSH
! 3716: printf("entering state %d\n", target);
! 3717: #endif
! 3718: if (comp->compact[target * (comp->nbstrings + 1)] ==
! 3719: XML_REGEXP_SINK_STATE)
! 3720: goto error;
! 3721:
! 3722: if (comp->compact[target * (comp->nbstrings + 1)] ==
! 3723: XML_REGEXP_FINAL_STATE)
! 3724: return(1);
! 3725: return(0);
! 3726: }
! 3727: }
! 3728: }
! 3729: /*
! 3730: * Failed to find an exit transition out from current state for the
! 3731: * current token
! 3732: */
! 3733: #ifdef DEBUG_PUSH
! 3734: printf("failed to find a transition for %s on state %d\n", value, state);
! 3735: #endif
! 3736: error:
! 3737: if (exec->errString != NULL)
! 3738: xmlFree(exec->errString);
! 3739: exec->errString = xmlStrdup(value);
! 3740: exec->errStateNo = state;
! 3741: exec->status = -1;
! 3742: #ifdef DEBUG_ERR
! 3743: testerr(exec);
! 3744: #endif
! 3745: return(-1);
! 3746: }
! 3747:
! 3748: /**
! 3749: * xmlRegExecPushStringInternal:
! 3750: * @exec: a regexp execution context or NULL to indicate the end
! 3751: * @value: a string token input
! 3752: * @data: data associated to the token to reuse in callbacks
! 3753: * @compound: value was assembled from 2 strings
! 3754: *
! 3755: * Push one input token in the execution context
! 3756: *
! 3757: * Returns: 1 if the regexp reached a final state, 0 if non-final, and
! 3758: * a negative value in case of error.
! 3759: */
! 3760: static int
! 3761: xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
! 3762: void *data, int compound) {
! 3763: xmlRegTransPtr trans;
! 3764: xmlRegAtomPtr atom;
! 3765: int ret;
! 3766: int final = 0;
! 3767: int progress = 1;
! 3768:
! 3769: if (exec == NULL)
! 3770: return(-1);
! 3771: if (exec->comp == NULL)
! 3772: return(-1);
! 3773: if (exec->status != 0)
! 3774: return(exec->status);
! 3775:
! 3776: if (exec->comp->compact != NULL)
! 3777: return(xmlRegCompactPushString(exec, exec->comp, value, data));
! 3778:
! 3779: if (value == NULL) {
! 3780: if (exec->state->type == XML_REGEXP_FINAL_STATE)
! 3781: return(1);
! 3782: final = 1;
! 3783: }
! 3784:
! 3785: #ifdef DEBUG_PUSH
! 3786: printf("value pushed: %s\n", value);
! 3787: #endif
! 3788: /*
! 3789: * If we have an active rollback stack push the new value there
! 3790: * and get back to where we were left
! 3791: */
! 3792: if ((value != NULL) && (exec->inputStackNr > 0)) {
! 3793: xmlFARegExecSaveInputString(exec, value, data);
! 3794: value = exec->inputStack[exec->index].value;
! 3795: data = exec->inputStack[exec->index].data;
! 3796: #ifdef DEBUG_PUSH
! 3797: printf("value loaded: %s\n", value);
! 3798: #endif
! 3799: }
! 3800:
! 3801: while ((exec->status == 0) &&
! 3802: ((value != NULL) ||
! 3803: ((final == 1) &&
! 3804: (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
! 3805:
! 3806: /*
! 3807: * End of input on non-terminal state, rollback, however we may
! 3808: * still have epsilon like transition for counted transitions
! 3809: * on counters, in that case don't break too early.
! 3810: */
! 3811: if ((value == NULL) && (exec->counts == NULL))
! 3812: goto rollback;
! 3813:
! 3814: exec->transcount = 0;
! 3815: for (;exec->transno < exec->state->nbTrans;exec->transno++) {
! 3816: trans = &exec->state->trans[exec->transno];
! 3817: if (trans->to < 0)
! 3818: continue;
! 3819: atom = trans->atom;
! 3820: ret = 0;
! 3821: if (trans->count == REGEXP_ALL_LAX_COUNTER) {
! 3822: int i;
! 3823: int count;
! 3824: xmlRegTransPtr t;
! 3825: xmlRegCounterPtr counter;
! 3826:
! 3827: ret = 0;
! 3828:
! 3829: #ifdef DEBUG_PUSH
! 3830: printf("testing all lax %d\n", trans->count);
! 3831: #endif
! 3832: /*
! 3833: * Check all counted transitions from the current state
! 3834: */
! 3835: if ((value == NULL) && (final)) {
! 3836: ret = 1;
! 3837: } else if (value != NULL) {
! 3838: for (i = 0;i < exec->state->nbTrans;i++) {
! 3839: t = &exec->state->trans[i];
! 3840: if ((t->counter < 0) || (t == trans))
! 3841: continue;
! 3842: counter = &exec->comp->counters[t->counter];
! 3843: count = exec->counts[t->counter];
! 3844: if ((count < counter->max) &&
! 3845: (t->atom != NULL) &&
! 3846: (xmlStrEqual(value, t->atom->valuep))) {
! 3847: ret = 0;
! 3848: break;
! 3849: }
! 3850: if ((count >= counter->min) &&
! 3851: (count < counter->max) &&
! 3852: (t->atom != NULL) &&
! 3853: (xmlStrEqual(value, t->atom->valuep))) {
! 3854: ret = 1;
! 3855: break;
! 3856: }
! 3857: }
! 3858: }
! 3859: } else if (trans->count == REGEXP_ALL_COUNTER) {
! 3860: int i;
! 3861: int count;
! 3862: xmlRegTransPtr t;
! 3863: xmlRegCounterPtr counter;
! 3864:
! 3865: ret = 1;
! 3866:
! 3867: #ifdef DEBUG_PUSH
! 3868: printf("testing all %d\n", trans->count);
! 3869: #endif
! 3870: /*
! 3871: * Check all counted transitions from the current state
! 3872: */
! 3873: for (i = 0;i < exec->state->nbTrans;i++) {
! 3874: t = &exec->state->trans[i];
! 3875: if ((t->counter < 0) || (t == trans))
! 3876: continue;
! 3877: counter = &exec->comp->counters[t->counter];
! 3878: count = exec->counts[t->counter];
! 3879: if ((count < counter->min) || (count > counter->max)) {
! 3880: ret = 0;
! 3881: break;
! 3882: }
! 3883: }
! 3884: } else if (trans->count >= 0) {
! 3885: int count;
! 3886: xmlRegCounterPtr counter;
! 3887:
! 3888: /*
! 3889: * A counted transition.
! 3890: */
! 3891:
! 3892: count = exec->counts[trans->count];
! 3893: counter = &exec->comp->counters[trans->count];
! 3894: #ifdef DEBUG_PUSH
! 3895: printf("testing count %d: val %d, min %d, max %d\n",
! 3896: trans->count, count, counter->min, counter->max);
! 3897: #endif
! 3898: ret = ((count >= counter->min) && (count <= counter->max));
! 3899: } else if (atom == NULL) {
! 3900: fprintf(stderr, "epsilon transition left at runtime\n");
! 3901: exec->status = -2;
! 3902: break;
! 3903: } else if (value != NULL) {
! 3904: ret = xmlRegStrEqualWildcard(atom->valuep, value);
! 3905: if (atom->neg) {
! 3906: ret = !ret;
! 3907: if (!compound)
! 3908: ret = 0;
! 3909: }
! 3910: if ((ret == 1) && (trans->counter >= 0)) {
! 3911: xmlRegCounterPtr counter;
! 3912: int count;
! 3913:
! 3914: count = exec->counts[trans->counter];
! 3915: counter = &exec->comp->counters[trans->counter];
! 3916: if (count >= counter->max)
! 3917: ret = 0;
! 3918: }
! 3919:
! 3920: if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
! 3921: xmlRegStatePtr to = exec->comp->states[trans->to];
! 3922:
! 3923: /*
! 3924: * this is a multiple input sequence
! 3925: */
! 3926: if (exec->state->nbTrans > exec->transno + 1) {
! 3927: if (exec->inputStackNr <= 0) {
! 3928: xmlFARegExecSaveInputString(exec, value, data);
! 3929: }
! 3930: xmlFARegExecSave(exec);
! 3931: }
! 3932: exec->transcount = 1;
! 3933: do {
! 3934: /*
! 3935: * Try to progress as much as possible on the input
! 3936: */
! 3937: if (exec->transcount == atom->max) {
! 3938: break;
! 3939: }
! 3940: exec->index++;
! 3941: value = exec->inputStack[exec->index].value;
! 3942: data = exec->inputStack[exec->index].data;
! 3943: #ifdef DEBUG_PUSH
! 3944: printf("value loaded: %s\n", value);
! 3945: #endif
! 3946:
! 3947: /*
! 3948: * End of input: stop here
! 3949: */
! 3950: if (value == NULL) {
! 3951: exec->index --;
! 3952: break;
! 3953: }
! 3954: if (exec->transcount >= atom->min) {
! 3955: int transno = exec->transno;
! 3956: xmlRegStatePtr state = exec->state;
! 3957:
! 3958: /*
! 3959: * The transition is acceptable save it
! 3960: */
! 3961: exec->transno = -1; /* trick */
! 3962: exec->state = to;
! 3963: if (exec->inputStackNr <= 0) {
! 3964: xmlFARegExecSaveInputString(exec, value, data);
! 3965: }
! 3966: xmlFARegExecSave(exec);
! 3967: exec->transno = transno;
! 3968: exec->state = state;
! 3969: }
! 3970: ret = xmlStrEqual(value, atom->valuep);
! 3971: exec->transcount++;
! 3972: } while (ret == 1);
! 3973: if (exec->transcount < atom->min)
! 3974: ret = 0;
! 3975:
! 3976: /*
! 3977: * If the last check failed but one transition was found
! 3978: * possible, rollback
! 3979: */
! 3980: if (ret < 0)
! 3981: ret = 0;
! 3982: if (ret == 0) {
! 3983: goto rollback;
! 3984: }
! 3985: }
! 3986: }
! 3987: if (ret == 1) {
! 3988: if ((exec->callback != NULL) && (atom != NULL) &&
! 3989: (data != NULL)) {
! 3990: exec->callback(exec->data, atom->valuep,
! 3991: atom->data, data);
! 3992: }
! 3993: if (exec->state->nbTrans > exec->transno + 1) {
! 3994: if (exec->inputStackNr <= 0) {
! 3995: xmlFARegExecSaveInputString(exec, value, data);
! 3996: }
! 3997: xmlFARegExecSave(exec);
! 3998: }
! 3999: if (trans->counter >= 0) {
! 4000: #ifdef DEBUG_PUSH
! 4001: printf("Increasing count %d\n", trans->counter);
! 4002: #endif
! 4003: exec->counts[trans->counter]++;
! 4004: }
! 4005: if ((trans->count >= 0) &&
! 4006: (trans->count < REGEXP_ALL_COUNTER)) {
! 4007: #ifdef DEBUG_REGEXP_EXEC
! 4008: printf("resetting count %d on transition\n",
! 4009: trans->count);
! 4010: #endif
! 4011: exec->counts[trans->count] = 0;
! 4012: }
! 4013: #ifdef DEBUG_PUSH
! 4014: printf("entering state %d\n", trans->to);
! 4015: #endif
! 4016: if ((exec->comp->states[trans->to] != NULL) &&
! 4017: (exec->comp->states[trans->to]->type ==
! 4018: XML_REGEXP_SINK_STATE)) {
! 4019: /*
! 4020: * entering a sink state, save the current state as error
! 4021: * state.
! 4022: */
! 4023: if (exec->errString != NULL)
! 4024: xmlFree(exec->errString);
! 4025: exec->errString = xmlStrdup(value);
! 4026: exec->errState = exec->state;
! 4027: memcpy(exec->errCounts, exec->counts,
! 4028: exec->comp->nbCounters * sizeof(int));
! 4029: }
! 4030: exec->state = exec->comp->states[trans->to];
! 4031: exec->transno = 0;
! 4032: if (trans->atom != NULL) {
! 4033: if (exec->inputStack != NULL) {
! 4034: exec->index++;
! 4035: if (exec->index < exec->inputStackNr) {
! 4036: value = exec->inputStack[exec->index].value;
! 4037: data = exec->inputStack[exec->index].data;
! 4038: #ifdef DEBUG_PUSH
! 4039: printf("value loaded: %s\n", value);
! 4040: #endif
! 4041: } else {
! 4042: value = NULL;
! 4043: data = NULL;
! 4044: #ifdef DEBUG_PUSH
! 4045: printf("end of input\n");
! 4046: #endif
! 4047: }
! 4048: } else {
! 4049: value = NULL;
! 4050: data = NULL;
! 4051: #ifdef DEBUG_PUSH
! 4052: printf("end of input\n");
! 4053: #endif
! 4054: }
! 4055: }
! 4056: goto progress;
! 4057: } else if (ret < 0) {
! 4058: exec->status = -4;
! 4059: break;
! 4060: }
! 4061: }
! 4062: if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
! 4063: rollback:
! 4064: /*
! 4065: * if we didn't yet rollback on the current input
! 4066: * store the current state as the error state.
! 4067: */
! 4068: if ((progress) && (exec->state != NULL) &&
! 4069: (exec->state->type != XML_REGEXP_SINK_STATE)) {
! 4070: progress = 0;
! 4071: if (exec->errString != NULL)
! 4072: xmlFree(exec->errString);
! 4073: exec->errString = xmlStrdup(value);
! 4074: exec->errState = exec->state;
! 4075: memcpy(exec->errCounts, exec->counts,
! 4076: exec->comp->nbCounters * sizeof(int));
! 4077: }
! 4078:
! 4079: /*
! 4080: * Failed to find a way out
! 4081: */
! 4082: exec->determinist = 0;
! 4083: xmlFARegExecRollBack(exec);
! 4084: if (exec->status == 0) {
! 4085: value = exec->inputStack[exec->index].value;
! 4086: data = exec->inputStack[exec->index].data;
! 4087: #ifdef DEBUG_PUSH
! 4088: printf("value loaded: %s\n", value);
! 4089: #endif
! 4090: }
! 4091: }
! 4092: continue;
! 4093: progress:
! 4094: progress = 1;
! 4095: continue;
! 4096: }
! 4097: if (exec->status == 0) {
! 4098: return(exec->state->type == XML_REGEXP_FINAL_STATE);
! 4099: }
! 4100: #ifdef DEBUG_ERR
! 4101: if (exec->status < 0) {
! 4102: testerr(exec);
! 4103: }
! 4104: #endif
! 4105: return(exec->status);
! 4106: }
! 4107:
! 4108: /**
! 4109: * xmlRegExecPushString:
! 4110: * @exec: a regexp execution context or NULL to indicate the end
! 4111: * @value: a string token input
! 4112: * @data: data associated to the token to reuse in callbacks
! 4113: *
! 4114: * Push one input token in the execution context
! 4115: *
! 4116: * Returns: 1 if the regexp reached a final state, 0 if non-final, and
! 4117: * a negative value in case of error.
! 4118: */
! 4119: int
! 4120: xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
! 4121: void *data) {
! 4122: return(xmlRegExecPushStringInternal(exec, value, data, 0));
! 4123: }
! 4124:
! 4125: /**
! 4126: * xmlRegExecPushString2:
! 4127: * @exec: a regexp execution context or NULL to indicate the end
! 4128: * @value: the first string token input
! 4129: * @value2: the second string token input
! 4130: * @data: data associated to the token to reuse in callbacks
! 4131: *
! 4132: * Push one input token in the execution context
! 4133: *
! 4134: * Returns: 1 if the regexp reached a final state, 0 if non-final, and
! 4135: * a negative value in case of error.
! 4136: */
! 4137: int
! 4138: xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
! 4139: const xmlChar *value2, void *data) {
! 4140: xmlChar buf[150];
! 4141: int lenn, lenp, ret;
! 4142: xmlChar *str;
! 4143:
! 4144: if (exec == NULL)
! 4145: return(-1);
! 4146: if (exec->comp == NULL)
! 4147: return(-1);
! 4148: if (exec->status != 0)
! 4149: return(exec->status);
! 4150:
! 4151: if (value2 == NULL)
! 4152: return(xmlRegExecPushString(exec, value, data));
! 4153:
! 4154: lenn = strlen((char *) value2);
! 4155: lenp = strlen((char *) value);
! 4156:
! 4157: if (150 < lenn + lenp + 2) {
! 4158: str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
! 4159: if (str == NULL) {
! 4160: exec->status = -1;
! 4161: return(-1);
! 4162: }
! 4163: } else {
! 4164: str = buf;
! 4165: }
! 4166: memcpy(&str[0], value, lenp);
! 4167: str[lenp] = XML_REG_STRING_SEPARATOR;
! 4168: memcpy(&str[lenp + 1], value2, lenn);
! 4169: str[lenn + lenp + 1] = 0;
! 4170:
! 4171: if (exec->comp->compact != NULL)
! 4172: ret = xmlRegCompactPushString(exec, exec->comp, str, data);
! 4173: else
! 4174: ret = xmlRegExecPushStringInternal(exec, str, data, 1);
! 4175:
! 4176: if (str != buf)
! 4177: xmlFree(str);
! 4178: return(ret);
! 4179: }
! 4180:
! 4181: /**
! 4182: * xmlRegExecGetValues:
! 4183: * @exec: a regexp execution context
! 4184: * @err: error extraction or normal one
! 4185: * @nbval: pointer to the number of accepted values IN/OUT
! 4186: * @nbneg: return number of negative transitions
! 4187: * @values: pointer to the array of acceptable values
! 4188: * @terminal: return value if this was a terminal state
! 4189: *
! 4190: * Extract informations from the regexp execution, internal routine to
! 4191: * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
! 4192: *
! 4193: * Returns: 0 in case of success or -1 in case of error.
! 4194: */
! 4195: static int
! 4196: xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
! 4197: int *nbval, int *nbneg,
! 4198: xmlChar **values, int *terminal) {
! 4199: int maxval;
! 4200: int nb = 0;
! 4201:
! 4202: if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
! 4203: (values == NULL) || (*nbval <= 0))
! 4204: return(-1);
! 4205:
! 4206: maxval = *nbval;
! 4207: *nbval = 0;
! 4208: *nbneg = 0;
! 4209: if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
! 4210: xmlRegexpPtr comp;
! 4211: int target, i, state;
! 4212:
! 4213: comp = exec->comp;
! 4214:
! 4215: if (err) {
! 4216: if (exec->errStateNo == -1) return(-1);
! 4217: state = exec->errStateNo;
! 4218: } else {
! 4219: state = exec->index;
! 4220: }
! 4221: if (terminal != NULL) {
! 4222: if (comp->compact[state * (comp->nbstrings + 1)] ==
! 4223: XML_REGEXP_FINAL_STATE)
! 4224: *terminal = 1;
! 4225: else
! 4226: *terminal = 0;
! 4227: }
! 4228: for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
! 4229: target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
! 4230: if ((target > 0) && (target <= comp->nbstates) &&
! 4231: (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
! 4232: XML_REGEXP_SINK_STATE)) {
! 4233: values[nb++] = comp->stringMap[i];
! 4234: (*nbval)++;
! 4235: }
! 4236: }
! 4237: for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
! 4238: target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
! 4239: if ((target > 0) && (target <= comp->nbstates) &&
! 4240: (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
! 4241: XML_REGEXP_SINK_STATE)) {
! 4242: values[nb++] = comp->stringMap[i];
! 4243: (*nbneg)++;
! 4244: }
! 4245: }
! 4246: } else {
! 4247: int transno;
! 4248: xmlRegTransPtr trans;
! 4249: xmlRegAtomPtr atom;
! 4250: xmlRegStatePtr state;
! 4251:
! 4252: if (terminal != NULL) {
! 4253: if (exec->state->type == XML_REGEXP_FINAL_STATE)
! 4254: *terminal = 1;
! 4255: else
! 4256: *terminal = 0;
! 4257: }
! 4258:
! 4259: if (err) {
! 4260: if (exec->errState == NULL) return(-1);
! 4261: state = exec->errState;
! 4262: } else {
! 4263: if (exec->state == NULL) return(-1);
! 4264: state = exec->state;
! 4265: }
! 4266: for (transno = 0;
! 4267: (transno < state->nbTrans) && (nb < maxval);
! 4268: transno++) {
! 4269: trans = &state->trans[transno];
! 4270: if (trans->to < 0)
! 4271: continue;
! 4272: atom = trans->atom;
! 4273: if ((atom == NULL) || (atom->valuep == NULL))
! 4274: continue;
! 4275: if (trans->count == REGEXP_ALL_LAX_COUNTER) {
! 4276: /* this should not be reached but ... */
! 4277: TODO;
! 4278: } else if (trans->count == REGEXP_ALL_COUNTER) {
! 4279: /* this should not be reached but ... */
! 4280: TODO;
! 4281: } else if (trans->counter >= 0) {
! 4282: xmlRegCounterPtr counter = NULL;
! 4283: int count;
! 4284:
! 4285: if (err)
! 4286: count = exec->errCounts[trans->counter];
! 4287: else
! 4288: count = exec->counts[trans->counter];
! 4289: if (exec->comp != NULL)
! 4290: counter = &exec->comp->counters[trans->counter];
! 4291: if ((counter == NULL) || (count < counter->max)) {
! 4292: if (atom->neg)
! 4293: values[nb++] = (xmlChar *) atom->valuep2;
! 4294: else
! 4295: values[nb++] = (xmlChar *) atom->valuep;
! 4296: (*nbval)++;
! 4297: }
! 4298: } else {
! 4299: if ((exec->comp->states[trans->to] != NULL) &&
! 4300: (exec->comp->states[trans->to]->type !=
! 4301: XML_REGEXP_SINK_STATE)) {
! 4302: if (atom->neg)
! 4303: values[nb++] = (xmlChar *) atom->valuep2;
! 4304: else
! 4305: values[nb++] = (xmlChar *) atom->valuep;
! 4306: (*nbval)++;
! 4307: }
! 4308: }
! 4309: }
! 4310: for (transno = 0;
! 4311: (transno < state->nbTrans) && (nb < maxval);
! 4312: transno++) {
! 4313: trans = &state->trans[transno];
! 4314: if (trans->to < 0)
! 4315: continue;
! 4316: atom = trans->atom;
! 4317: if ((atom == NULL) || (atom->valuep == NULL))
! 4318: continue;
! 4319: if (trans->count == REGEXP_ALL_LAX_COUNTER) {
! 4320: continue;
! 4321: } else if (trans->count == REGEXP_ALL_COUNTER) {
! 4322: continue;
! 4323: } else if (trans->counter >= 0) {
! 4324: continue;
! 4325: } else {
! 4326: if ((exec->comp->states[trans->to] != NULL) &&
! 4327: (exec->comp->states[trans->to]->type ==
! 4328: XML_REGEXP_SINK_STATE)) {
! 4329: if (atom->neg)
! 4330: values[nb++] = (xmlChar *) atom->valuep2;
! 4331: else
! 4332: values[nb++] = (xmlChar *) atom->valuep;
! 4333: (*nbneg)++;
! 4334: }
! 4335: }
! 4336: }
! 4337: }
! 4338: return(0);
! 4339: }
! 4340:
! 4341: /**
! 4342: * xmlRegExecNextValues:
! 4343: * @exec: a regexp execution context
! 4344: * @nbval: pointer to the number of accepted values IN/OUT
! 4345: * @nbneg: return number of negative transitions
! 4346: * @values: pointer to the array of acceptable values
! 4347: * @terminal: return value if this was a terminal state
! 4348: *
! 4349: * Extract informations from the regexp execution,
! 4350: * the parameter @values must point to an array of @nbval string pointers
! 4351: * on return nbval will contain the number of possible strings in that
! 4352: * state and the @values array will be updated with them. The string values
! 4353: * returned will be freed with the @exec context and don't need to be
! 4354: * deallocated.
! 4355: *
! 4356: * Returns: 0 in case of success or -1 in case of error.
! 4357: */
! 4358: int
! 4359: xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
! 4360: xmlChar **values, int *terminal) {
! 4361: return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
! 4362: }
! 4363:
! 4364: /**
! 4365: * xmlRegExecErrInfo:
! 4366: * @exec: a regexp execution context generating an error
! 4367: * @string: return value for the error string
! 4368: * @nbval: pointer to the number of accepted values IN/OUT
! 4369: * @nbneg: return number of negative transitions
! 4370: * @values: pointer to the array of acceptable values
! 4371: * @terminal: return value if this was a terminal state
! 4372: *
! 4373: * Extract error informations from the regexp execution, the parameter
! 4374: * @string will be updated with the value pushed and not accepted,
! 4375: * the parameter @values must point to an array of @nbval string pointers
! 4376: * on return nbval will contain the number of possible strings in that
! 4377: * state and the @values array will be updated with them. The string values
! 4378: * returned will be freed with the @exec context and don't need to be
! 4379: * deallocated.
! 4380: *
! 4381: * Returns: 0 in case of success or -1 in case of error.
! 4382: */
! 4383: int
! 4384: xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
! 4385: int *nbval, int *nbneg, xmlChar **values, int *terminal) {
! 4386: if (exec == NULL)
! 4387: return(-1);
! 4388: if (string != NULL) {
! 4389: if (exec->status != 0)
! 4390: *string = exec->errString;
! 4391: else
! 4392: *string = NULL;
! 4393: }
! 4394: return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
! 4395: }
! 4396:
! 4397: #ifdef DEBUG_ERR
! 4398: static void testerr(xmlRegExecCtxtPtr exec) {
! 4399: const xmlChar *string;
! 4400: xmlChar *values[5];
! 4401: int nb = 5;
! 4402: int nbneg;
! 4403: int terminal;
! 4404: xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
! 4405: }
! 4406: #endif
! 4407:
! 4408: #if 0
! 4409: static int
! 4410: xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
! 4411: xmlRegTransPtr trans;
! 4412: xmlRegAtomPtr atom;
! 4413: int ret;
! 4414: int codepoint, len;
! 4415:
! 4416: if (exec == NULL)
! 4417: return(-1);
! 4418: if (exec->status != 0)
! 4419: return(exec->status);
! 4420:
! 4421: while ((exec->status == 0) &&
! 4422: ((exec->inputString[exec->index] != 0) ||
! 4423: (exec->state->type != XML_REGEXP_FINAL_STATE))) {
! 4424:
! 4425: /*
! 4426: * End of input on non-terminal state, rollback, however we may
! 4427: * still have epsilon like transition for counted transitions
! 4428: * on counters, in that case don't break too early.
! 4429: */
! 4430: if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
! 4431: goto rollback;
! 4432:
! 4433: exec->transcount = 0;
! 4434: for (;exec->transno < exec->state->nbTrans;exec->transno++) {
! 4435: trans = &exec->state->trans[exec->transno];
! 4436: if (trans->to < 0)
! 4437: continue;
! 4438: atom = trans->atom;
! 4439: ret = 0;
! 4440: if (trans->count >= 0) {
! 4441: int count;
! 4442: xmlRegCounterPtr counter;
! 4443:
! 4444: /*
! 4445: * A counted transition.
! 4446: */
! 4447:
! 4448: count = exec->counts[trans->count];
! 4449: counter = &exec->comp->counters[trans->count];
! 4450: #ifdef DEBUG_REGEXP_EXEC
! 4451: printf("testing count %d: val %d, min %d, max %d\n",
! 4452: trans->count, count, counter->min, counter->max);
! 4453: #endif
! 4454: ret = ((count >= counter->min) && (count <= counter->max));
! 4455: } else if (atom == NULL) {
! 4456: fprintf(stderr, "epsilon transition left at runtime\n");
! 4457: exec->status = -2;
! 4458: break;
! 4459: } else if (exec->inputString[exec->index] != 0) {
! 4460: codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
! 4461: ret = xmlRegCheckCharacter(atom, codepoint);
! 4462: if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
! 4463: xmlRegStatePtr to = exec->comp->states[trans->to];
! 4464:
! 4465: /*
! 4466: * this is a multiple input sequence
! 4467: */
! 4468: if (exec->state->nbTrans > exec->transno + 1) {
! 4469: xmlFARegExecSave(exec);
! 4470: }
! 4471: exec->transcount = 1;
! 4472: do {
! 4473: /*
! 4474: * Try to progress as much as possible on the input
! 4475: */
! 4476: if (exec->transcount == atom->max) {
! 4477: break;
! 4478: }
! 4479: exec->index += len;
! 4480: /*
! 4481: * End of input: stop here
! 4482: */
! 4483: if (exec->inputString[exec->index] == 0) {
! 4484: exec->index -= len;
! 4485: break;
! 4486: }
! 4487: if (exec->transcount >= atom->min) {
! 4488: int transno = exec->transno;
! 4489: xmlRegStatePtr state = exec->state;
! 4490:
! 4491: /*
! 4492: * The transition is acceptable save it
! 4493: */
! 4494: exec->transno = -1; /* trick */
! 4495: exec->state = to;
! 4496: xmlFARegExecSave(exec);
! 4497: exec->transno = transno;
! 4498: exec->state = state;
! 4499: }
! 4500: codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
! 4501: len);
! 4502: ret = xmlRegCheckCharacter(atom, codepoint);
! 4503: exec->transcount++;
! 4504: } while (ret == 1);
! 4505: if (exec->transcount < atom->min)
! 4506: ret = 0;
! 4507:
! 4508: /*
! 4509: * If the last check failed but one transition was found
! 4510: * possible, rollback
! 4511: */
! 4512: if (ret < 0)
! 4513: ret = 0;
! 4514: if (ret == 0) {
! 4515: goto rollback;
! 4516: }
! 4517: }
! 4518: }
! 4519: if (ret == 1) {
! 4520: if (exec->state->nbTrans > exec->transno + 1) {
! 4521: xmlFARegExecSave(exec);
! 4522: }
! 4523: /*
! 4524: * restart count for expressions like this ((abc){2})*
! 4525: */
! 4526: if (trans->count >= 0) {
! 4527: #ifdef DEBUG_REGEXP_EXEC
! 4528: printf("Reset count %d\n", trans->count);
! 4529: #endif
! 4530: exec->counts[trans->count] = 0;
! 4531: }
! 4532: if (trans->counter >= 0) {
! 4533: #ifdef DEBUG_REGEXP_EXEC
! 4534: printf("Increasing count %d\n", trans->counter);
! 4535: #endif
! 4536: exec->counts[trans->counter]++;
! 4537: }
! 4538: #ifdef DEBUG_REGEXP_EXEC
! 4539: printf("entering state %d\n", trans->to);
! 4540: #endif
! 4541: exec->state = exec->comp->states[trans->to];
! 4542: exec->transno = 0;
! 4543: if (trans->atom != NULL) {
! 4544: exec->index += len;
! 4545: }
! 4546: goto progress;
! 4547: } else if (ret < 0) {
! 4548: exec->status = -4;
! 4549: break;
! 4550: }
! 4551: }
! 4552: if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
! 4553: rollback:
! 4554: /*
! 4555: * Failed to find a way out
! 4556: */
! 4557: exec->determinist = 0;
! 4558: xmlFARegExecRollBack(exec);
! 4559: }
! 4560: progress:
! 4561: continue;
! 4562: }
! 4563: }
! 4564: #endif
! 4565: /************************************************************************
! 4566: * *
! 4567: * Parser for the Schemas Datatype Regular Expressions *
! 4568: * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
! 4569: * *
! 4570: ************************************************************************/
! 4571:
! 4572: /**
! 4573: * xmlFAIsChar:
! 4574: * @ctxt: a regexp parser context
! 4575: *
! 4576: * [10] Char ::= [^.\?*+()|#x5B#x5D]
! 4577: */
! 4578: static int
! 4579: xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
! 4580: int cur;
! 4581: int len;
! 4582:
! 4583: cur = CUR_SCHAR(ctxt->cur, len);
! 4584: if ((cur == '.') || (cur == '\\') || (cur == '?') ||
! 4585: (cur == '*') || (cur == '+') || (cur == '(') ||
! 4586: (cur == ')') || (cur == '|') || (cur == 0x5B) ||
! 4587: (cur == 0x5D) || (cur == 0))
! 4588: return(-1);
! 4589: return(cur);
! 4590: }
! 4591:
! 4592: /**
! 4593: * xmlFAParseCharProp:
! 4594: * @ctxt: a regexp parser context
! 4595: *
! 4596: * [27] charProp ::= IsCategory | IsBlock
! 4597: * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation |
! 4598: * Separators | Symbols | Others
! 4599: * [29] Letters ::= 'L' [ultmo]?
! 4600: * [30] Marks ::= 'M' [nce]?
! 4601: * [31] Numbers ::= 'N' [dlo]?
! 4602: * [32] Punctuation ::= 'P' [cdseifo]?
! 4603: * [33] Separators ::= 'Z' [slp]?
! 4604: * [34] Symbols ::= 'S' [mcko]?
! 4605: * [35] Others ::= 'C' [cfon]?
! 4606: * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+
! 4607: */
! 4608: static void
! 4609: xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
! 4610: int cur;
! 4611: xmlRegAtomType type = (xmlRegAtomType) 0;
! 4612: xmlChar *blockName = NULL;
! 4613:
! 4614: cur = CUR;
! 4615: if (cur == 'L') {
! 4616: NEXT;
! 4617: cur = CUR;
! 4618: if (cur == 'u') {
! 4619: NEXT;
! 4620: type = XML_REGEXP_LETTER_UPPERCASE;
! 4621: } else if (cur == 'l') {
! 4622: NEXT;
! 4623: type = XML_REGEXP_LETTER_LOWERCASE;
! 4624: } else if (cur == 't') {
! 4625: NEXT;
! 4626: type = XML_REGEXP_LETTER_TITLECASE;
! 4627: } else if (cur == 'm') {
! 4628: NEXT;
! 4629: type = XML_REGEXP_LETTER_MODIFIER;
! 4630: } else if (cur == 'o') {
! 4631: NEXT;
! 4632: type = XML_REGEXP_LETTER_OTHERS;
! 4633: } else {
! 4634: type = XML_REGEXP_LETTER;
! 4635: }
! 4636: } else if (cur == 'M') {
! 4637: NEXT;
! 4638: cur = CUR;
! 4639: if (cur == 'n') {
! 4640: NEXT;
! 4641: /* nonspacing */
! 4642: type = XML_REGEXP_MARK_NONSPACING;
! 4643: } else if (cur == 'c') {
! 4644: NEXT;
! 4645: /* spacing combining */
! 4646: type = XML_REGEXP_MARK_SPACECOMBINING;
! 4647: } else if (cur == 'e') {
! 4648: NEXT;
! 4649: /* enclosing */
! 4650: type = XML_REGEXP_MARK_ENCLOSING;
! 4651: } else {
! 4652: /* all marks */
! 4653: type = XML_REGEXP_MARK;
! 4654: }
! 4655: } else if (cur == 'N') {
! 4656: NEXT;
! 4657: cur = CUR;
! 4658: if (cur == 'd') {
! 4659: NEXT;
! 4660: /* digital */
! 4661: type = XML_REGEXP_NUMBER_DECIMAL;
! 4662: } else if (cur == 'l') {
! 4663: NEXT;
! 4664: /* letter */
! 4665: type = XML_REGEXP_NUMBER_LETTER;
! 4666: } else if (cur == 'o') {
! 4667: NEXT;
! 4668: /* other */
! 4669: type = XML_REGEXP_NUMBER_OTHERS;
! 4670: } else {
! 4671: /* all numbers */
! 4672: type = XML_REGEXP_NUMBER;
! 4673: }
! 4674: } else if (cur == 'P') {
! 4675: NEXT;
! 4676: cur = CUR;
! 4677: if (cur == 'c') {
! 4678: NEXT;
! 4679: /* connector */
! 4680: type = XML_REGEXP_PUNCT_CONNECTOR;
! 4681: } else if (cur == 'd') {
! 4682: NEXT;
! 4683: /* dash */
! 4684: type = XML_REGEXP_PUNCT_DASH;
! 4685: } else if (cur == 's') {
! 4686: NEXT;
! 4687: /* open */
! 4688: type = XML_REGEXP_PUNCT_OPEN;
! 4689: } else if (cur == 'e') {
! 4690: NEXT;
! 4691: /* close */
! 4692: type = XML_REGEXP_PUNCT_CLOSE;
! 4693: } else if (cur == 'i') {
! 4694: NEXT;
! 4695: /* initial quote */
! 4696: type = XML_REGEXP_PUNCT_INITQUOTE;
! 4697: } else if (cur == 'f') {
! 4698: NEXT;
! 4699: /* final quote */
! 4700: type = XML_REGEXP_PUNCT_FINQUOTE;
! 4701: } else if (cur == 'o') {
! 4702: NEXT;
! 4703: /* other */
! 4704: type = XML_REGEXP_PUNCT_OTHERS;
! 4705: } else {
! 4706: /* all punctuation */
! 4707: type = XML_REGEXP_PUNCT;
! 4708: }
! 4709: } else if (cur == 'Z') {
! 4710: NEXT;
! 4711: cur = CUR;
! 4712: if (cur == 's') {
! 4713: NEXT;
! 4714: /* space */
! 4715: type = XML_REGEXP_SEPAR_SPACE;
! 4716: } else if (cur == 'l') {
! 4717: NEXT;
! 4718: /* line */
! 4719: type = XML_REGEXP_SEPAR_LINE;
! 4720: } else if (cur == 'p') {
! 4721: NEXT;
! 4722: /* paragraph */
! 4723: type = XML_REGEXP_SEPAR_PARA;
! 4724: } else {
! 4725: /* all separators */
! 4726: type = XML_REGEXP_SEPAR;
! 4727: }
! 4728: } else if (cur == 'S') {
! 4729: NEXT;
! 4730: cur = CUR;
! 4731: if (cur == 'm') {
! 4732: NEXT;
! 4733: type = XML_REGEXP_SYMBOL_MATH;
! 4734: /* math */
! 4735: } else if (cur == 'c') {
! 4736: NEXT;
! 4737: type = XML_REGEXP_SYMBOL_CURRENCY;
! 4738: /* currency */
! 4739: } else if (cur == 'k') {
! 4740: NEXT;
! 4741: type = XML_REGEXP_SYMBOL_MODIFIER;
! 4742: /* modifiers */
! 4743: } else if (cur == 'o') {
! 4744: NEXT;
! 4745: type = XML_REGEXP_SYMBOL_OTHERS;
! 4746: /* other */
! 4747: } else {
! 4748: /* all symbols */
! 4749: type = XML_REGEXP_SYMBOL;
! 4750: }
! 4751: } else if (cur == 'C') {
! 4752: NEXT;
! 4753: cur = CUR;
! 4754: if (cur == 'c') {
! 4755: NEXT;
! 4756: /* control */
! 4757: type = XML_REGEXP_OTHER_CONTROL;
! 4758: } else if (cur == 'f') {
! 4759: NEXT;
! 4760: /* format */
! 4761: type = XML_REGEXP_OTHER_FORMAT;
! 4762: } else if (cur == 'o') {
! 4763: NEXT;
! 4764: /* private use */
! 4765: type = XML_REGEXP_OTHER_PRIVATE;
! 4766: } else if (cur == 'n') {
! 4767: NEXT;
! 4768: /* not assigned */
! 4769: type = XML_REGEXP_OTHER_NA;
! 4770: } else {
! 4771: /* all others */
! 4772: type = XML_REGEXP_OTHER;
! 4773: }
! 4774: } else if (cur == 'I') {
! 4775: const xmlChar *start;
! 4776: NEXT;
! 4777: cur = CUR;
! 4778: if (cur != 's') {
! 4779: ERROR("IsXXXX expected");
! 4780: return;
! 4781: }
! 4782: NEXT;
! 4783: start = ctxt->cur;
! 4784: cur = CUR;
! 4785: if (((cur >= 'a') && (cur <= 'z')) ||
! 4786: ((cur >= 'A') && (cur <= 'Z')) ||
! 4787: ((cur >= '0') && (cur <= '9')) ||
! 4788: (cur == 0x2D)) {
! 4789: NEXT;
! 4790: cur = CUR;
! 4791: while (((cur >= 'a') && (cur <= 'z')) ||
! 4792: ((cur >= 'A') && (cur <= 'Z')) ||
! 4793: ((cur >= '0') && (cur <= '9')) ||
! 4794: (cur == 0x2D)) {
! 4795: NEXT;
! 4796: cur = CUR;
! 4797: }
! 4798: }
! 4799: type = XML_REGEXP_BLOCK_NAME;
! 4800: blockName = xmlStrndup(start, ctxt->cur - start);
! 4801: } else {
! 4802: ERROR("Unknown char property");
! 4803: return;
! 4804: }
! 4805: if (ctxt->atom == NULL) {
! 4806: ctxt->atom = xmlRegNewAtom(ctxt, type);
! 4807: if (ctxt->atom != NULL)
! 4808: ctxt->atom->valuep = blockName;
! 4809: } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
! 4810: xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
! 4811: type, 0, 0, blockName);
! 4812: }
! 4813: }
! 4814:
! 4815: /**
! 4816: * xmlFAParseCharClassEsc:
! 4817: * @ctxt: a regexp parser context
! 4818: *
! 4819: * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
! 4820: * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
! 4821: * [25] catEsc ::= '\p{' charProp '}'
! 4822: * [26] complEsc ::= '\P{' charProp '}'
! 4823: * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
! 4824: */
! 4825: static void
! 4826: xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
! 4827: int cur;
! 4828:
! 4829: if (CUR == '.') {
! 4830: if (ctxt->atom == NULL) {
! 4831: ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
! 4832: } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
! 4833: xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
! 4834: XML_REGEXP_ANYCHAR, 0, 0, NULL);
! 4835: }
! 4836: NEXT;
! 4837: return;
! 4838: }
! 4839: if (CUR != '\\') {
! 4840: ERROR("Escaped sequence: expecting \\");
! 4841: return;
! 4842: }
! 4843: NEXT;
! 4844: cur = CUR;
! 4845: if (cur == 'p') {
! 4846: NEXT;
! 4847: if (CUR != '{') {
! 4848: ERROR("Expecting '{'");
! 4849: return;
! 4850: }
! 4851: NEXT;
! 4852: xmlFAParseCharProp(ctxt);
! 4853: if (CUR != '}') {
! 4854: ERROR("Expecting '}'");
! 4855: return;
! 4856: }
! 4857: NEXT;
! 4858: } else if (cur == 'P') {
! 4859: NEXT;
! 4860: if (CUR != '{') {
! 4861: ERROR("Expecting '{'");
! 4862: return;
! 4863: }
! 4864: NEXT;
! 4865: xmlFAParseCharProp(ctxt);
! 4866: ctxt->atom->neg = 1;
! 4867: if (CUR != '}') {
! 4868: ERROR("Expecting '}'");
! 4869: return;
! 4870: }
! 4871: NEXT;
! 4872: } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
! 4873: (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
! 4874: (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
! 4875: (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
! 4876: (cur == 0x5E)) {
! 4877: if (ctxt->atom == NULL) {
! 4878: ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
! 4879: if (ctxt->atom != NULL) {
! 4880: switch (cur) {
! 4881: case 'n':
! 4882: ctxt->atom->codepoint = '\n';
! 4883: break;
! 4884: case 'r':
! 4885: ctxt->atom->codepoint = '\r';
! 4886: break;
! 4887: case 't':
! 4888: ctxt->atom->codepoint = '\t';
! 4889: break;
! 4890: default:
! 4891: ctxt->atom->codepoint = cur;
! 4892: }
! 4893: }
! 4894: } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
! 4895: switch (cur) {
! 4896: case 'n':
! 4897: cur = '\n';
! 4898: break;
! 4899: case 'r':
! 4900: cur = '\r';
! 4901: break;
! 4902: case 't':
! 4903: cur = '\t';
! 4904: break;
! 4905: }
! 4906: xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
! 4907: XML_REGEXP_CHARVAL, cur, cur, NULL);
! 4908: }
! 4909: NEXT;
! 4910: } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
! 4911: (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
! 4912: (cur == 'w') || (cur == 'W')) {
! 4913: xmlRegAtomType type = XML_REGEXP_ANYSPACE;
! 4914:
! 4915: switch (cur) {
! 4916: case 's':
! 4917: type = XML_REGEXP_ANYSPACE;
! 4918: break;
! 4919: case 'S':
! 4920: type = XML_REGEXP_NOTSPACE;
! 4921: break;
! 4922: case 'i':
! 4923: type = XML_REGEXP_INITNAME;
! 4924: break;
! 4925: case 'I':
! 4926: type = XML_REGEXP_NOTINITNAME;
! 4927: break;
! 4928: case 'c':
! 4929: type = XML_REGEXP_NAMECHAR;
! 4930: break;
! 4931: case 'C':
! 4932: type = XML_REGEXP_NOTNAMECHAR;
! 4933: break;
! 4934: case 'd':
! 4935: type = XML_REGEXP_DECIMAL;
! 4936: break;
! 4937: case 'D':
! 4938: type = XML_REGEXP_NOTDECIMAL;
! 4939: break;
! 4940: case 'w':
! 4941: type = XML_REGEXP_REALCHAR;
! 4942: break;
! 4943: case 'W':
! 4944: type = XML_REGEXP_NOTREALCHAR;
! 4945: break;
! 4946: }
! 4947: NEXT;
! 4948: if (ctxt->atom == NULL) {
! 4949: ctxt->atom = xmlRegNewAtom(ctxt, type);
! 4950: } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
! 4951: xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
! 4952: type, 0, 0, NULL);
! 4953: }
! 4954: } else {
! 4955: ERROR("Wrong escape sequence, misuse of character '\\'");
! 4956: }
! 4957: }
! 4958:
! 4959: /**
! 4960: * xmlFAParseCharRange:
! 4961: * @ctxt: a regexp parser context
! 4962: *
! 4963: * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash
! 4964: * [18] seRange ::= charOrEsc '-' charOrEsc
! 4965: * [20] charOrEsc ::= XmlChar | SingleCharEsc
! 4966: * [21] XmlChar ::= [^\#x2D#x5B#x5D]
! 4967: * [22] XmlCharIncDash ::= [^\#x5B#x5D]
! 4968: */
! 4969: static void
! 4970: xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
! 4971: int cur, len;
! 4972: int start = -1;
! 4973: int end = -1;
! 4974:
! 4975: if (CUR == '\0') {
! 4976: ERROR("Expecting ']'");
! 4977: return;
! 4978: }
! 4979:
! 4980: cur = CUR;
! 4981: if (cur == '\\') {
! 4982: NEXT;
! 4983: cur = CUR;
! 4984: switch (cur) {
! 4985: case 'n': start = 0xA; break;
! 4986: case 'r': start = 0xD; break;
! 4987: case 't': start = 0x9; break;
! 4988: case '\\': case '|': case '.': case '-': case '^': case '?':
! 4989: case '*': case '+': case '{': case '}': case '(': case ')':
! 4990: case '[': case ']':
! 4991: start = cur; break;
! 4992: default:
! 4993: ERROR("Invalid escape value");
! 4994: return;
! 4995: }
! 4996: end = start;
! 4997: len = 1;
! 4998: } else if ((cur != 0x5B) && (cur != 0x5D)) {
! 4999: end = start = CUR_SCHAR(ctxt->cur, len);
! 5000: } else {
! 5001: ERROR("Expecting a char range");
! 5002: return;
! 5003: }
! 5004: /*
! 5005: * Since we are "inside" a range, we can assume ctxt->cur is past
! 5006: * the start of ctxt->string, and PREV should be safe
! 5007: */
! 5008: if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
! 5009: NEXTL(len);
! 5010: return;
! 5011: }
! 5012: NEXTL(len);
! 5013: cur = CUR;
! 5014: if ((cur != '-') || (NXT(1) == ']')) {
! 5015: xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
! 5016: XML_REGEXP_CHARVAL, start, end, NULL);
! 5017: return;
! 5018: }
! 5019: NEXT;
! 5020: cur = CUR;
! 5021: if (cur == '\\') {
! 5022: NEXT;
! 5023: cur = CUR;
! 5024: switch (cur) {
! 5025: case 'n': end = 0xA; break;
! 5026: case 'r': end = 0xD; break;
! 5027: case 't': end = 0x9; break;
! 5028: case '\\': case '|': case '.': case '-': case '^': case '?':
! 5029: case '*': case '+': case '{': case '}': case '(': case ')':
! 5030: case '[': case ']':
! 5031: end = cur; break;
! 5032: default:
! 5033: ERROR("Invalid escape value");
! 5034: return;
! 5035: }
! 5036: len = 1;
! 5037: } else if ((cur != 0x5B) && (cur != 0x5D)) {
! 5038: end = CUR_SCHAR(ctxt->cur, len);
! 5039: } else {
! 5040: ERROR("Expecting the end of a char range");
! 5041: return;
! 5042: }
! 5043: NEXTL(len);
! 5044: /* TODO check that the values are acceptable character ranges for XML */
! 5045: if (end < start) {
! 5046: ERROR("End of range is before start of range");
! 5047: } else {
! 5048: xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
! 5049: XML_REGEXP_CHARVAL, start, end, NULL);
! 5050: }
! 5051: return;
! 5052: }
! 5053:
! 5054: /**
! 5055: * xmlFAParsePosCharGroup:
! 5056: * @ctxt: a regexp parser context
! 5057: *
! 5058: * [14] posCharGroup ::= ( charRange | charClassEsc )+
! 5059: */
! 5060: static void
! 5061: xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
! 5062: do {
! 5063: if (CUR == '\\') {
! 5064: xmlFAParseCharClassEsc(ctxt);
! 5065: } else {
! 5066: xmlFAParseCharRange(ctxt);
! 5067: }
! 5068: } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
! 5069: (CUR != 0) && (ctxt->error == 0));
! 5070: }
! 5071:
! 5072: /**
! 5073: * xmlFAParseCharGroup:
! 5074: * @ctxt: a regexp parser context
! 5075: *
! 5076: * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
! 5077: * [15] negCharGroup ::= '^' posCharGroup
! 5078: * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
! 5079: * [12] charClassExpr ::= '[' charGroup ']'
! 5080: */
! 5081: static void
! 5082: xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
! 5083: int n = ctxt->neg;
! 5084: while ((CUR != ']') && (ctxt->error == 0)) {
! 5085: if (CUR == '^') {
! 5086: int neg = ctxt->neg;
! 5087:
! 5088: NEXT;
! 5089: ctxt->neg = !ctxt->neg;
! 5090: xmlFAParsePosCharGroup(ctxt);
! 5091: ctxt->neg = neg;
! 5092: } else if ((CUR == '-') && (NXT(1) == '[')) {
! 5093: int neg = ctxt->neg;
! 5094: ctxt->neg = 2;
! 5095: NEXT; /* eat the '-' */
! 5096: NEXT; /* eat the '[' */
! 5097: xmlFAParseCharGroup(ctxt);
! 5098: if (CUR == ']') {
! 5099: NEXT;
! 5100: } else {
! 5101: ERROR("charClassExpr: ']' expected");
! 5102: break;
! 5103: }
! 5104: ctxt->neg = neg;
! 5105: break;
! 5106: } else if (CUR != ']') {
! 5107: xmlFAParsePosCharGroup(ctxt);
! 5108: }
! 5109: }
! 5110: ctxt->neg = n;
! 5111: }
! 5112:
! 5113: /**
! 5114: * xmlFAParseCharClass:
! 5115: * @ctxt: a regexp parser context
! 5116: *
! 5117: * [11] charClass ::= charClassEsc | charClassExpr
! 5118: * [12] charClassExpr ::= '[' charGroup ']'
! 5119: */
! 5120: static void
! 5121: xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
! 5122: if (CUR == '[') {
! 5123: NEXT;
! 5124: ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
! 5125: if (ctxt->atom == NULL)
! 5126: return;
! 5127: xmlFAParseCharGroup(ctxt);
! 5128: if (CUR == ']') {
! 5129: NEXT;
! 5130: } else {
! 5131: ERROR("xmlFAParseCharClass: ']' expected");
! 5132: }
! 5133: } else {
! 5134: xmlFAParseCharClassEsc(ctxt);
! 5135: }
! 5136: }
! 5137:
! 5138: /**
! 5139: * xmlFAParseQuantExact:
! 5140: * @ctxt: a regexp parser context
! 5141: *
! 5142: * [8] QuantExact ::= [0-9]+
! 5143: *
! 5144: * Returns 0 if success or -1 in case of error
! 5145: */
! 5146: static int
! 5147: xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
! 5148: int ret = 0;
! 5149: int ok = 0;
! 5150:
! 5151: while ((CUR >= '0') && (CUR <= '9')) {
! 5152: ret = ret * 10 + (CUR - '0');
! 5153: ok = 1;
! 5154: NEXT;
! 5155: }
! 5156: if (ok != 1) {
! 5157: return(-1);
! 5158: }
! 5159: return(ret);
! 5160: }
! 5161:
! 5162: /**
! 5163: * xmlFAParseQuantifier:
! 5164: * @ctxt: a regexp parser context
! 5165: *
! 5166: * [4] quantifier ::= [?*+] | ( '{' quantity '}' )
! 5167: * [5] quantity ::= quantRange | quantMin | QuantExact
! 5168: * [6] quantRange ::= QuantExact ',' QuantExact
! 5169: * [7] quantMin ::= QuantExact ','
! 5170: * [8] QuantExact ::= [0-9]+
! 5171: */
! 5172: static int
! 5173: xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
! 5174: int cur;
! 5175:
! 5176: cur = CUR;
! 5177: if ((cur == '?') || (cur == '*') || (cur == '+')) {
! 5178: if (ctxt->atom != NULL) {
! 5179: if (cur == '?')
! 5180: ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
! 5181: else if (cur == '*')
! 5182: ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
! 5183: else if (cur == '+')
! 5184: ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
! 5185: }
! 5186: NEXT;
! 5187: return(1);
! 5188: }
! 5189: if (cur == '{') {
! 5190: int min = 0, max = 0;
! 5191:
! 5192: NEXT;
! 5193: cur = xmlFAParseQuantExact(ctxt);
! 5194: if (cur >= 0)
! 5195: min = cur;
! 5196: if (CUR == ',') {
! 5197: NEXT;
! 5198: if (CUR == '}')
! 5199: max = INT_MAX;
! 5200: else {
! 5201: cur = xmlFAParseQuantExact(ctxt);
! 5202: if (cur >= 0)
! 5203: max = cur;
! 5204: else {
! 5205: ERROR("Improper quantifier");
! 5206: }
! 5207: }
! 5208: }
! 5209: if (CUR == '}') {
! 5210: NEXT;
! 5211: } else {
! 5212: ERROR("Unterminated quantifier");
! 5213: }
! 5214: if (max == 0)
! 5215: max = min;
! 5216: if (ctxt->atom != NULL) {
! 5217: ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
! 5218: ctxt->atom->min = min;
! 5219: ctxt->atom->max = max;
! 5220: }
! 5221: return(1);
! 5222: }
! 5223: return(0);
! 5224: }
! 5225:
! 5226: /**
! 5227: * xmlFAParseAtom:
! 5228: * @ctxt: a regexp parser context
! 5229: *
! 5230: * [9] atom ::= Char | charClass | ( '(' regExp ')' )
! 5231: */
! 5232: static int
! 5233: xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
! 5234: int codepoint, len;
! 5235:
! 5236: codepoint = xmlFAIsChar(ctxt);
! 5237: if (codepoint > 0) {
! 5238: ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
! 5239: if (ctxt->atom == NULL)
! 5240: return(-1);
! 5241: codepoint = CUR_SCHAR(ctxt->cur, len);
! 5242: ctxt->atom->codepoint = codepoint;
! 5243: NEXTL(len);
! 5244: return(1);
! 5245: } else if (CUR == '|') {
! 5246: return(0);
! 5247: } else if (CUR == 0) {
! 5248: return(0);
! 5249: } else if (CUR == ')') {
! 5250: return(0);
! 5251: } else if (CUR == '(') {
! 5252: xmlRegStatePtr start, oldend, start0;
! 5253:
! 5254: NEXT;
! 5255: /*
! 5256: * this extra Epsilon transition is needed if we count with 0 allowed
! 5257: * unfortunately this can't be known at that point
! 5258: */
! 5259: xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
! 5260: start0 = ctxt->state;
! 5261: xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
! 5262: start = ctxt->state;
! 5263: oldend = ctxt->end;
! 5264: ctxt->end = NULL;
! 5265: ctxt->atom = NULL;
! 5266: xmlFAParseRegExp(ctxt, 0);
! 5267: if (CUR == ')') {
! 5268: NEXT;
! 5269: } else {
! 5270: ERROR("xmlFAParseAtom: expecting ')'");
! 5271: }
! 5272: ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
! 5273: if (ctxt->atom == NULL)
! 5274: return(-1);
! 5275: ctxt->atom->start = start;
! 5276: ctxt->atom->start0 = start0;
! 5277: ctxt->atom->stop = ctxt->state;
! 5278: ctxt->end = oldend;
! 5279: return(1);
! 5280: } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
! 5281: xmlFAParseCharClass(ctxt);
! 5282: return(1);
! 5283: }
! 5284: return(0);
! 5285: }
! 5286:
! 5287: /**
! 5288: * xmlFAParsePiece:
! 5289: * @ctxt: a regexp parser context
! 5290: *
! 5291: * [3] piece ::= atom quantifier?
! 5292: */
! 5293: static int
! 5294: xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
! 5295: int ret;
! 5296:
! 5297: ctxt->atom = NULL;
! 5298: ret = xmlFAParseAtom(ctxt);
! 5299: if (ret == 0)
! 5300: return(0);
! 5301: if (ctxt->atom == NULL) {
! 5302: ERROR("internal: no atom generated");
! 5303: }
! 5304: xmlFAParseQuantifier(ctxt);
! 5305: return(1);
! 5306: }
! 5307:
! 5308: /**
! 5309: * xmlFAParseBranch:
! 5310: * @ctxt: a regexp parser context
! 5311: * @to: optional target to the end of the branch
! 5312: *
! 5313: * @to is used to optimize by removing duplicate path in automata
! 5314: * in expressions like (a|b)(c|d)
! 5315: *
! 5316: * [2] branch ::= piece*
! 5317: */
! 5318: static int
! 5319: xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
! 5320: xmlRegStatePtr previous;
! 5321: int ret;
! 5322:
! 5323: previous = ctxt->state;
! 5324: ret = xmlFAParsePiece(ctxt);
! 5325: if (ret != 0) {
! 5326: if (xmlFAGenerateTransitions(ctxt, previous,
! 5327: (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
! 5328: return(-1);
! 5329: previous = ctxt->state;
! 5330: ctxt->atom = NULL;
! 5331: }
! 5332: while ((ret != 0) && (ctxt->error == 0)) {
! 5333: ret = xmlFAParsePiece(ctxt);
! 5334: if (ret != 0) {
! 5335: if (xmlFAGenerateTransitions(ctxt, previous,
! 5336: (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
! 5337: return(-1);
! 5338: previous = ctxt->state;
! 5339: ctxt->atom = NULL;
! 5340: }
! 5341: }
! 5342: return(0);
! 5343: }
! 5344:
! 5345: /**
! 5346: * xmlFAParseRegExp:
! 5347: * @ctxt: a regexp parser context
! 5348: * @top: is this the top-level expression ?
! 5349: *
! 5350: * [1] regExp ::= branch ( '|' branch )*
! 5351: */
! 5352: static void
! 5353: xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
! 5354: xmlRegStatePtr start, end;
! 5355:
! 5356: /* if not top start should have been generated by an epsilon trans */
! 5357: start = ctxt->state;
! 5358: ctxt->end = NULL;
! 5359: xmlFAParseBranch(ctxt, NULL);
! 5360: if (top) {
! 5361: #ifdef DEBUG_REGEXP_GRAPH
! 5362: printf("State %d is final\n", ctxt->state->no);
! 5363: #endif
! 5364: ctxt->state->type = XML_REGEXP_FINAL_STATE;
! 5365: }
! 5366: if (CUR != '|') {
! 5367: ctxt->end = ctxt->state;
! 5368: return;
! 5369: }
! 5370: end = ctxt->state;
! 5371: while ((CUR == '|') && (ctxt->error == 0)) {
! 5372: NEXT;
! 5373: ctxt->state = start;
! 5374: ctxt->end = NULL;
! 5375: xmlFAParseBranch(ctxt, end);
! 5376: }
! 5377: if (!top) {
! 5378: ctxt->state = end;
! 5379: ctxt->end = end;
! 5380: }
! 5381: }
! 5382:
! 5383: /************************************************************************
! 5384: * *
! 5385: * The basic API *
! 5386: * *
! 5387: ************************************************************************/
! 5388:
! 5389: /**
! 5390: * xmlRegexpPrint:
! 5391: * @output: the file for the output debug
! 5392: * @regexp: the compiled regexp
! 5393: *
! 5394: * Print the content of the compiled regular expression
! 5395: */
! 5396: void
! 5397: xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
! 5398: int i;
! 5399:
! 5400: if (output == NULL)
! 5401: return;
! 5402: fprintf(output, " regexp: ");
! 5403: if (regexp == NULL) {
! 5404: fprintf(output, "NULL\n");
! 5405: return;
! 5406: }
! 5407: fprintf(output, "'%s' ", regexp->string);
! 5408: fprintf(output, "\n");
! 5409: fprintf(output, "%d atoms:\n", regexp->nbAtoms);
! 5410: for (i = 0;i < regexp->nbAtoms; i++) {
! 5411: fprintf(output, " %02d ", i);
! 5412: xmlRegPrintAtom(output, regexp->atoms[i]);
! 5413: }
! 5414: fprintf(output, "%d states:", regexp->nbStates);
! 5415: fprintf(output, "\n");
! 5416: for (i = 0;i < regexp->nbStates; i++) {
! 5417: xmlRegPrintState(output, regexp->states[i]);
! 5418: }
! 5419: fprintf(output, "%d counters:\n", regexp->nbCounters);
! 5420: for (i = 0;i < regexp->nbCounters; i++) {
! 5421: fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
! 5422: regexp->counters[i].max);
! 5423: }
! 5424: }
! 5425:
! 5426: /**
! 5427: * xmlRegexpCompile:
! 5428: * @regexp: a regular expression string
! 5429: *
! 5430: * Parses a regular expression conforming to XML Schemas Part 2 Datatype
! 5431: * Appendix F and builds an automata suitable for testing strings against
! 5432: * that regular expression
! 5433: *
! 5434: * Returns the compiled expression or NULL in case of error
! 5435: */
! 5436: xmlRegexpPtr
! 5437: xmlRegexpCompile(const xmlChar *regexp) {
! 5438: xmlRegexpPtr ret;
! 5439: xmlRegParserCtxtPtr ctxt;
! 5440:
! 5441: ctxt = xmlRegNewParserCtxt(regexp);
! 5442: if (ctxt == NULL)
! 5443: return(NULL);
! 5444:
! 5445: /* initialize the parser */
! 5446: ctxt->end = NULL;
! 5447: ctxt->start = ctxt->state = xmlRegNewState(ctxt);
! 5448: xmlRegStatePush(ctxt, ctxt->start);
! 5449:
! 5450: /* parse the expression building an automata */
! 5451: xmlFAParseRegExp(ctxt, 1);
! 5452: if (CUR != 0) {
! 5453: ERROR("xmlFAParseRegExp: extra characters");
! 5454: }
! 5455: if (ctxt->error != 0) {
! 5456: xmlRegFreeParserCtxt(ctxt);
! 5457: return(NULL);
! 5458: }
! 5459: ctxt->end = ctxt->state;
! 5460: ctxt->start->type = XML_REGEXP_START_STATE;
! 5461: ctxt->end->type = XML_REGEXP_FINAL_STATE;
! 5462:
! 5463: /* remove the Epsilon except for counted transitions */
! 5464: xmlFAEliminateEpsilonTransitions(ctxt);
! 5465:
! 5466:
! 5467: if (ctxt->error != 0) {
! 5468: xmlRegFreeParserCtxt(ctxt);
! 5469: return(NULL);
! 5470: }
! 5471: ret = xmlRegEpxFromParse(ctxt);
! 5472: xmlRegFreeParserCtxt(ctxt);
! 5473: return(ret);
! 5474: }
! 5475:
! 5476: /**
! 5477: * xmlRegexpExec:
! 5478: * @comp: the compiled regular expression
! 5479: * @content: the value to check against the regular expression
! 5480: *
! 5481: * Check if the regular expression generates the value
! 5482: *
! 5483: * Returns 1 if it matches, 0 if not and a negative value in case of error
! 5484: */
! 5485: int
! 5486: xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
! 5487: if ((comp == NULL) || (content == NULL))
! 5488: return(-1);
! 5489: return(xmlFARegExec(comp, content));
! 5490: }
! 5491:
! 5492: /**
! 5493: * xmlRegexpIsDeterminist:
! 5494: * @comp: the compiled regular expression
! 5495: *
! 5496: * Check if the regular expression is determinist
! 5497: *
! 5498: * Returns 1 if it yes, 0 if not and a negative value in case of error
! 5499: */
! 5500: int
! 5501: xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
! 5502: xmlAutomataPtr am;
! 5503: int ret;
! 5504:
! 5505: if (comp == NULL)
! 5506: return(-1);
! 5507: if (comp->determinist != -1)
! 5508: return(comp->determinist);
! 5509:
! 5510: am = xmlNewAutomata();
! 5511: if (am->states != NULL) {
! 5512: int i;
! 5513:
! 5514: for (i = 0;i < am->nbStates;i++)
! 5515: xmlRegFreeState(am->states[i]);
! 5516: xmlFree(am->states);
! 5517: }
! 5518: am->nbAtoms = comp->nbAtoms;
! 5519: am->atoms = comp->atoms;
! 5520: am->nbStates = comp->nbStates;
! 5521: am->states = comp->states;
! 5522: am->determinist = -1;
! 5523: am->flags = comp->flags;
! 5524: ret = xmlFAComputesDeterminism(am);
! 5525: am->atoms = NULL;
! 5526: am->states = NULL;
! 5527: xmlFreeAutomata(am);
! 5528: comp->determinist = ret;
! 5529: return(ret);
! 5530: }
! 5531:
! 5532: /**
! 5533: * xmlRegFreeRegexp:
! 5534: * @regexp: the regexp
! 5535: *
! 5536: * Free a regexp
! 5537: */
! 5538: void
! 5539: xmlRegFreeRegexp(xmlRegexpPtr regexp) {
! 5540: int i;
! 5541: if (regexp == NULL)
! 5542: return;
! 5543:
! 5544: if (regexp->string != NULL)
! 5545: xmlFree(regexp->string);
! 5546: if (regexp->states != NULL) {
! 5547: for (i = 0;i < regexp->nbStates;i++)
! 5548: xmlRegFreeState(regexp->states[i]);
! 5549: xmlFree(regexp->states);
! 5550: }
! 5551: if (regexp->atoms != NULL) {
! 5552: for (i = 0;i < regexp->nbAtoms;i++)
! 5553: xmlRegFreeAtom(regexp->atoms[i]);
! 5554: xmlFree(regexp->atoms);
! 5555: }
! 5556: if (regexp->counters != NULL)
! 5557: xmlFree(regexp->counters);
! 5558: if (regexp->compact != NULL)
! 5559: xmlFree(regexp->compact);
! 5560: if (regexp->transdata != NULL)
! 5561: xmlFree(regexp->transdata);
! 5562: if (regexp->stringMap != NULL) {
! 5563: for (i = 0; i < regexp->nbstrings;i++)
! 5564: xmlFree(regexp->stringMap[i]);
! 5565: xmlFree(regexp->stringMap);
! 5566: }
! 5567:
! 5568: xmlFree(regexp);
! 5569: }
! 5570:
! 5571: #ifdef LIBXML_AUTOMATA_ENABLED
! 5572: /************************************************************************
! 5573: * *
! 5574: * The Automata interface *
! 5575: * *
! 5576: ************************************************************************/
! 5577:
! 5578: /**
! 5579: * xmlNewAutomata:
! 5580: *
! 5581: * Create a new automata
! 5582: *
! 5583: * Returns the new object or NULL in case of failure
! 5584: */
! 5585: xmlAutomataPtr
! 5586: xmlNewAutomata(void) {
! 5587: xmlAutomataPtr ctxt;
! 5588:
! 5589: ctxt = xmlRegNewParserCtxt(NULL);
! 5590: if (ctxt == NULL)
! 5591: return(NULL);
! 5592:
! 5593: /* initialize the parser */
! 5594: ctxt->end = NULL;
! 5595: ctxt->start = ctxt->state = xmlRegNewState(ctxt);
! 5596: if (ctxt->start == NULL) {
! 5597: xmlFreeAutomata(ctxt);
! 5598: return(NULL);
! 5599: }
! 5600: ctxt->start->type = XML_REGEXP_START_STATE;
! 5601: if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
! 5602: xmlRegFreeState(ctxt->start);
! 5603: xmlFreeAutomata(ctxt);
! 5604: return(NULL);
! 5605: }
! 5606: ctxt->flags = 0;
! 5607:
! 5608: return(ctxt);
! 5609: }
! 5610:
! 5611: /**
! 5612: * xmlFreeAutomata:
! 5613: * @am: an automata
! 5614: *
! 5615: * Free an automata
! 5616: */
! 5617: void
! 5618: xmlFreeAutomata(xmlAutomataPtr am) {
! 5619: if (am == NULL)
! 5620: return;
! 5621: xmlRegFreeParserCtxt(am);
! 5622: }
! 5623:
! 5624: /**
! 5625: * xmlAutomataSetFlags:
! 5626: * @am: an automata
! 5627: * @flags: a set of internal flags
! 5628: *
! 5629: * Set some flags on the automata
! 5630: */
! 5631: void
! 5632: xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
! 5633: if (am == NULL)
! 5634: return;
! 5635: am->flags |= flags;
! 5636: }
! 5637:
! 5638: /**
! 5639: * xmlAutomataGetInitState:
! 5640: * @am: an automata
! 5641: *
! 5642: * Initial state lookup
! 5643: *
! 5644: * Returns the initial state of the automata
! 5645: */
! 5646: xmlAutomataStatePtr
! 5647: xmlAutomataGetInitState(xmlAutomataPtr am) {
! 5648: if (am == NULL)
! 5649: return(NULL);
! 5650: return(am->start);
! 5651: }
! 5652:
! 5653: /**
! 5654: * xmlAutomataSetFinalState:
! 5655: * @am: an automata
! 5656: * @state: a state in this automata
! 5657: *
! 5658: * Makes that state a final state
! 5659: *
! 5660: * Returns 0 or -1 in case of error
! 5661: */
! 5662: int
! 5663: xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
! 5664: if ((am == NULL) || (state == NULL))
! 5665: return(-1);
! 5666: state->type = XML_REGEXP_FINAL_STATE;
! 5667: return(0);
! 5668: }
! 5669:
! 5670: /**
! 5671: * xmlAutomataNewTransition:
! 5672: * @am: an automata
! 5673: * @from: the starting point of the transition
! 5674: * @to: the target point of the transition or NULL
! 5675: * @token: the input string associated to that transition
! 5676: * @data: data passed to the callback function if the transition is activated
! 5677: *
! 5678: * If @to is NULL, this creates first a new target state in the automata
! 5679: * and then adds a transition from the @from state to the target state
! 5680: * activated by the value of @token
! 5681: *
! 5682: * Returns the target state or NULL in case of error
! 5683: */
! 5684: xmlAutomataStatePtr
! 5685: xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 5686: xmlAutomataStatePtr to, const xmlChar *token,
! 5687: void *data) {
! 5688: xmlRegAtomPtr atom;
! 5689:
! 5690: if ((am == NULL) || (from == NULL) || (token == NULL))
! 5691: return(NULL);
! 5692: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 5693: if (atom == NULL)
! 5694: return(NULL);
! 5695: atom->data = data;
! 5696: if (atom == NULL)
! 5697: return(NULL);
! 5698: atom->valuep = xmlStrdup(token);
! 5699:
! 5700: if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
! 5701: xmlRegFreeAtom(atom);
! 5702: return(NULL);
! 5703: }
! 5704: if (to == NULL)
! 5705: return(am->state);
! 5706: return(to);
! 5707: }
! 5708:
! 5709: /**
! 5710: * xmlAutomataNewTransition2:
! 5711: * @am: an automata
! 5712: * @from: the starting point of the transition
! 5713: * @to: the target point of the transition or NULL
! 5714: * @token: the first input string associated to that transition
! 5715: * @token2: the second input string associated to that transition
! 5716: * @data: data passed to the callback function if the transition is activated
! 5717: *
! 5718: * If @to is NULL, this creates first a new target state in the automata
! 5719: * and then adds a transition from the @from state to the target state
! 5720: * activated by the value of @token
! 5721: *
! 5722: * Returns the target state or NULL in case of error
! 5723: */
! 5724: xmlAutomataStatePtr
! 5725: xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 5726: xmlAutomataStatePtr to, const xmlChar *token,
! 5727: const xmlChar *token2, void *data) {
! 5728: xmlRegAtomPtr atom;
! 5729:
! 5730: if ((am == NULL) || (from == NULL) || (token == NULL))
! 5731: return(NULL);
! 5732: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 5733: if (atom == NULL)
! 5734: return(NULL);
! 5735: atom->data = data;
! 5736: if ((token2 == NULL) || (*token2 == 0)) {
! 5737: atom->valuep = xmlStrdup(token);
! 5738: } else {
! 5739: int lenn, lenp;
! 5740: xmlChar *str;
! 5741:
! 5742: lenn = strlen((char *) token2);
! 5743: lenp = strlen((char *) token);
! 5744:
! 5745: str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
! 5746: if (str == NULL) {
! 5747: xmlRegFreeAtom(atom);
! 5748: return(NULL);
! 5749: }
! 5750: memcpy(&str[0], token, lenp);
! 5751: str[lenp] = '|';
! 5752: memcpy(&str[lenp + 1], token2, lenn);
! 5753: str[lenn + lenp + 1] = 0;
! 5754:
! 5755: atom->valuep = str;
! 5756: }
! 5757:
! 5758: if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
! 5759: xmlRegFreeAtom(atom);
! 5760: return(NULL);
! 5761: }
! 5762: if (to == NULL)
! 5763: return(am->state);
! 5764: return(to);
! 5765: }
! 5766:
! 5767: /**
! 5768: * xmlAutomataNewNegTrans:
! 5769: * @am: an automata
! 5770: * @from: the starting point of the transition
! 5771: * @to: the target point of the transition or NULL
! 5772: * @token: the first input string associated to that transition
! 5773: * @token2: the second input string associated to that transition
! 5774: * @data: data passed to the callback function if the transition is activated
! 5775: *
! 5776: * If @to is NULL, this creates first a new target state in the automata
! 5777: * and then adds a transition from the @from state to the target state
! 5778: * activated by any value except (@token,@token2)
! 5779: * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
! 5780: # the semantic of XSD ##other
! 5781: *
! 5782: * Returns the target state or NULL in case of error
! 5783: */
! 5784: xmlAutomataStatePtr
! 5785: xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 5786: xmlAutomataStatePtr to, const xmlChar *token,
! 5787: const xmlChar *token2, void *data) {
! 5788: xmlRegAtomPtr atom;
! 5789: xmlChar err_msg[200];
! 5790:
! 5791: if ((am == NULL) || (from == NULL) || (token == NULL))
! 5792: return(NULL);
! 5793: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 5794: if (atom == NULL)
! 5795: return(NULL);
! 5796: atom->data = data;
! 5797: atom->neg = 1;
! 5798: if ((token2 == NULL) || (*token2 == 0)) {
! 5799: atom->valuep = xmlStrdup(token);
! 5800: } else {
! 5801: int lenn, lenp;
! 5802: xmlChar *str;
! 5803:
! 5804: lenn = strlen((char *) token2);
! 5805: lenp = strlen((char *) token);
! 5806:
! 5807: str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
! 5808: if (str == NULL) {
! 5809: xmlRegFreeAtom(atom);
! 5810: return(NULL);
! 5811: }
! 5812: memcpy(&str[0], token, lenp);
! 5813: str[lenp] = '|';
! 5814: memcpy(&str[lenp + 1], token2, lenn);
! 5815: str[lenn + lenp + 1] = 0;
! 5816:
! 5817: atom->valuep = str;
! 5818: }
! 5819: snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
! 5820: err_msg[199] = 0;
! 5821: atom->valuep2 = xmlStrdup(err_msg);
! 5822:
! 5823: if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
! 5824: xmlRegFreeAtom(atom);
! 5825: return(NULL);
! 5826: }
! 5827: am->negs++;
! 5828: if (to == NULL)
! 5829: return(am->state);
! 5830: return(to);
! 5831: }
! 5832:
! 5833: /**
! 5834: * xmlAutomataNewCountTrans2:
! 5835: * @am: an automata
! 5836: * @from: the starting point of the transition
! 5837: * @to: the target point of the transition or NULL
! 5838: * @token: the input string associated to that transition
! 5839: * @token2: the second input string associated to that transition
! 5840: * @min: the minimum successive occurences of token
! 5841: * @max: the maximum successive occurences of token
! 5842: * @data: data associated to the transition
! 5843: *
! 5844: * If @to is NULL, this creates first a new target state in the automata
! 5845: * and then adds a transition from the @from state to the target state
! 5846: * activated by a succession of input of value @token and @token2 and
! 5847: * whose number is between @min and @max
! 5848: *
! 5849: * Returns the target state or NULL in case of error
! 5850: */
! 5851: xmlAutomataStatePtr
! 5852: xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 5853: xmlAutomataStatePtr to, const xmlChar *token,
! 5854: const xmlChar *token2,
! 5855: int min, int max, void *data) {
! 5856: xmlRegAtomPtr atom;
! 5857: int counter;
! 5858:
! 5859: if ((am == NULL) || (from == NULL) || (token == NULL))
! 5860: return(NULL);
! 5861: if (min < 0)
! 5862: return(NULL);
! 5863: if ((max < min) || (max < 1))
! 5864: return(NULL);
! 5865: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 5866: if (atom == NULL)
! 5867: return(NULL);
! 5868: if ((token2 == NULL) || (*token2 == 0)) {
! 5869: atom->valuep = xmlStrdup(token);
! 5870: } else {
! 5871: int lenn, lenp;
! 5872: xmlChar *str;
! 5873:
! 5874: lenn = strlen((char *) token2);
! 5875: lenp = strlen((char *) token);
! 5876:
! 5877: str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
! 5878: if (str == NULL) {
! 5879: xmlRegFreeAtom(atom);
! 5880: return(NULL);
! 5881: }
! 5882: memcpy(&str[0], token, lenp);
! 5883: str[lenp] = '|';
! 5884: memcpy(&str[lenp + 1], token2, lenn);
! 5885: str[lenn + lenp + 1] = 0;
! 5886:
! 5887: atom->valuep = str;
! 5888: }
! 5889: atom->data = data;
! 5890: if (min == 0)
! 5891: atom->min = 1;
! 5892: else
! 5893: atom->min = min;
! 5894: atom->max = max;
! 5895:
! 5896: /*
! 5897: * associate a counter to the transition.
! 5898: */
! 5899: counter = xmlRegGetCounter(am);
! 5900: am->counters[counter].min = min;
! 5901: am->counters[counter].max = max;
! 5902:
! 5903: /* xmlFAGenerateTransitions(am, from, to, atom); */
! 5904: if (to == NULL) {
! 5905: to = xmlRegNewState(am);
! 5906: xmlRegStatePush(am, to);
! 5907: }
! 5908: xmlRegStateAddTrans(am, from, atom, to, counter, -1);
! 5909: xmlRegAtomPush(am, atom);
! 5910: am->state = to;
! 5911:
! 5912: if (to == NULL)
! 5913: to = am->state;
! 5914: if (to == NULL)
! 5915: return(NULL);
! 5916: if (min == 0)
! 5917: xmlFAGenerateEpsilonTransition(am, from, to);
! 5918: return(to);
! 5919: }
! 5920:
! 5921: /**
! 5922: * xmlAutomataNewCountTrans:
! 5923: * @am: an automata
! 5924: * @from: the starting point of the transition
! 5925: * @to: the target point of the transition or NULL
! 5926: * @token: the input string associated to that transition
! 5927: * @min: the minimum successive occurences of token
! 5928: * @max: the maximum successive occurences of token
! 5929: * @data: data associated to the transition
! 5930: *
! 5931: * If @to is NULL, this creates first a new target state in the automata
! 5932: * and then adds a transition from the @from state to the target state
! 5933: * activated by a succession of input of value @token and whose number
! 5934: * is between @min and @max
! 5935: *
! 5936: * Returns the target state or NULL in case of error
! 5937: */
! 5938: xmlAutomataStatePtr
! 5939: xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 5940: xmlAutomataStatePtr to, const xmlChar *token,
! 5941: int min, int max, void *data) {
! 5942: xmlRegAtomPtr atom;
! 5943: int counter;
! 5944:
! 5945: if ((am == NULL) || (from == NULL) || (token == NULL))
! 5946: return(NULL);
! 5947: if (min < 0)
! 5948: return(NULL);
! 5949: if ((max < min) || (max < 1))
! 5950: return(NULL);
! 5951: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 5952: if (atom == NULL)
! 5953: return(NULL);
! 5954: atom->valuep = xmlStrdup(token);
! 5955: atom->data = data;
! 5956: if (min == 0)
! 5957: atom->min = 1;
! 5958: else
! 5959: atom->min = min;
! 5960: atom->max = max;
! 5961:
! 5962: /*
! 5963: * associate a counter to the transition.
! 5964: */
! 5965: counter = xmlRegGetCounter(am);
! 5966: am->counters[counter].min = min;
! 5967: am->counters[counter].max = max;
! 5968:
! 5969: /* xmlFAGenerateTransitions(am, from, to, atom); */
! 5970: if (to == NULL) {
! 5971: to = xmlRegNewState(am);
! 5972: xmlRegStatePush(am, to);
! 5973: }
! 5974: xmlRegStateAddTrans(am, from, atom, to, counter, -1);
! 5975: xmlRegAtomPush(am, atom);
! 5976: am->state = to;
! 5977:
! 5978: if (to == NULL)
! 5979: to = am->state;
! 5980: if (to == NULL)
! 5981: return(NULL);
! 5982: if (min == 0)
! 5983: xmlFAGenerateEpsilonTransition(am, from, to);
! 5984: return(to);
! 5985: }
! 5986:
! 5987: /**
! 5988: * xmlAutomataNewOnceTrans2:
! 5989: * @am: an automata
! 5990: * @from: the starting point of the transition
! 5991: * @to: the target point of the transition or NULL
! 5992: * @token: the input string associated to that transition
! 5993: * @token2: the second input string associated to that transition
! 5994: * @min: the minimum successive occurences of token
! 5995: * @max: the maximum successive occurences of token
! 5996: * @data: data associated to the transition
! 5997: *
! 5998: * If @to is NULL, this creates first a new target state in the automata
! 5999: * and then adds a transition from the @from state to the target state
! 6000: * activated by a succession of input of value @token and @token2 and whose
! 6001: * number is between @min and @max, moreover that transition can only be
! 6002: * crossed once.
! 6003: *
! 6004: * Returns the target state or NULL in case of error
! 6005: */
! 6006: xmlAutomataStatePtr
! 6007: xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 6008: xmlAutomataStatePtr to, const xmlChar *token,
! 6009: const xmlChar *token2,
! 6010: int min, int max, void *data) {
! 6011: xmlRegAtomPtr atom;
! 6012: int counter;
! 6013:
! 6014: if ((am == NULL) || (from == NULL) || (token == NULL))
! 6015: return(NULL);
! 6016: if (min < 1)
! 6017: return(NULL);
! 6018: if ((max < min) || (max < 1))
! 6019: return(NULL);
! 6020: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 6021: if (atom == NULL)
! 6022: return(NULL);
! 6023: if ((token2 == NULL) || (*token2 == 0)) {
! 6024: atom->valuep = xmlStrdup(token);
! 6025: } else {
! 6026: int lenn, lenp;
! 6027: xmlChar *str;
! 6028:
! 6029: lenn = strlen((char *) token2);
! 6030: lenp = strlen((char *) token);
! 6031:
! 6032: str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
! 6033: if (str == NULL) {
! 6034: xmlRegFreeAtom(atom);
! 6035: return(NULL);
! 6036: }
! 6037: memcpy(&str[0], token, lenp);
! 6038: str[lenp] = '|';
! 6039: memcpy(&str[lenp + 1], token2, lenn);
! 6040: str[lenn + lenp + 1] = 0;
! 6041:
! 6042: atom->valuep = str;
! 6043: }
! 6044: atom->data = data;
! 6045: atom->quant = XML_REGEXP_QUANT_ONCEONLY;
! 6046: atom->min = min;
! 6047: atom->max = max;
! 6048: /*
! 6049: * associate a counter to the transition.
! 6050: */
! 6051: counter = xmlRegGetCounter(am);
! 6052: am->counters[counter].min = 1;
! 6053: am->counters[counter].max = 1;
! 6054:
! 6055: /* xmlFAGenerateTransitions(am, from, to, atom); */
! 6056: if (to == NULL) {
! 6057: to = xmlRegNewState(am);
! 6058: xmlRegStatePush(am, to);
! 6059: }
! 6060: xmlRegStateAddTrans(am, from, atom, to, counter, -1);
! 6061: xmlRegAtomPush(am, atom);
! 6062: am->state = to;
! 6063: return(to);
! 6064: }
! 6065:
! 6066:
! 6067:
! 6068: /**
! 6069: * xmlAutomataNewOnceTrans:
! 6070: * @am: an automata
! 6071: * @from: the starting point of the transition
! 6072: * @to: the target point of the transition or NULL
! 6073: * @token: the input string associated to that transition
! 6074: * @min: the minimum successive occurences of token
! 6075: * @max: the maximum successive occurences of token
! 6076: * @data: data associated to the transition
! 6077: *
! 6078: * If @to is NULL, this creates first a new target state in the automata
! 6079: * and then adds a transition from the @from state to the target state
! 6080: * activated by a succession of input of value @token and whose number
! 6081: * is between @min and @max, moreover that transition can only be crossed
! 6082: * once.
! 6083: *
! 6084: * Returns the target state or NULL in case of error
! 6085: */
! 6086: xmlAutomataStatePtr
! 6087: xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 6088: xmlAutomataStatePtr to, const xmlChar *token,
! 6089: int min, int max, void *data) {
! 6090: xmlRegAtomPtr atom;
! 6091: int counter;
! 6092:
! 6093: if ((am == NULL) || (from == NULL) || (token == NULL))
! 6094: return(NULL);
! 6095: if (min < 1)
! 6096: return(NULL);
! 6097: if ((max < min) || (max < 1))
! 6098: return(NULL);
! 6099: atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
! 6100: if (atom == NULL)
! 6101: return(NULL);
! 6102: atom->valuep = xmlStrdup(token);
! 6103: atom->data = data;
! 6104: atom->quant = XML_REGEXP_QUANT_ONCEONLY;
! 6105: atom->min = min;
! 6106: atom->max = max;
! 6107: /*
! 6108: * associate a counter to the transition.
! 6109: */
! 6110: counter = xmlRegGetCounter(am);
! 6111: am->counters[counter].min = 1;
! 6112: am->counters[counter].max = 1;
! 6113:
! 6114: /* xmlFAGenerateTransitions(am, from, to, atom); */
! 6115: if (to == NULL) {
! 6116: to = xmlRegNewState(am);
! 6117: xmlRegStatePush(am, to);
! 6118: }
! 6119: xmlRegStateAddTrans(am, from, atom, to, counter, -1);
! 6120: xmlRegAtomPush(am, atom);
! 6121: am->state = to;
! 6122: return(to);
! 6123: }
! 6124:
! 6125: /**
! 6126: * xmlAutomataNewState:
! 6127: * @am: an automata
! 6128: *
! 6129: * Create a new disconnected state in the automata
! 6130: *
! 6131: * Returns the new state or NULL in case of error
! 6132: */
! 6133: xmlAutomataStatePtr
! 6134: xmlAutomataNewState(xmlAutomataPtr am) {
! 6135: xmlAutomataStatePtr to;
! 6136:
! 6137: if (am == NULL)
! 6138: return(NULL);
! 6139: to = xmlRegNewState(am);
! 6140: xmlRegStatePush(am, to);
! 6141: return(to);
! 6142: }
! 6143:
! 6144: /**
! 6145: * xmlAutomataNewEpsilon:
! 6146: * @am: an automata
! 6147: * @from: the starting point of the transition
! 6148: * @to: the target point of the transition or NULL
! 6149: *
! 6150: * If @to is NULL, this creates first a new target state in the automata
! 6151: * and then adds an epsilon transition from the @from state to the
! 6152: * target state
! 6153: *
! 6154: * Returns the target state or NULL in case of error
! 6155: */
! 6156: xmlAutomataStatePtr
! 6157: xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 6158: xmlAutomataStatePtr to) {
! 6159: if ((am == NULL) || (from == NULL))
! 6160: return(NULL);
! 6161: xmlFAGenerateEpsilonTransition(am, from, to);
! 6162: if (to == NULL)
! 6163: return(am->state);
! 6164: return(to);
! 6165: }
! 6166:
! 6167: /**
! 6168: * xmlAutomataNewAllTrans:
! 6169: * @am: an automata
! 6170: * @from: the starting point of the transition
! 6171: * @to: the target point of the transition or NULL
! 6172: * @lax: allow to transition if not all all transitions have been activated
! 6173: *
! 6174: * If @to is NULL, this creates first a new target state in the automata
! 6175: * and then adds a an ALL transition from the @from state to the
! 6176: * target state. That transition is an epsilon transition allowed only when
! 6177: * all transitions from the @from node have been activated.
! 6178: *
! 6179: * Returns the target state or NULL in case of error
! 6180: */
! 6181: xmlAutomataStatePtr
! 6182: xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 6183: xmlAutomataStatePtr to, int lax) {
! 6184: if ((am == NULL) || (from == NULL))
! 6185: return(NULL);
! 6186: xmlFAGenerateAllTransition(am, from, to, lax);
! 6187: if (to == NULL)
! 6188: return(am->state);
! 6189: return(to);
! 6190: }
! 6191:
! 6192: /**
! 6193: * xmlAutomataNewCounter:
! 6194: * @am: an automata
! 6195: * @min: the minimal value on the counter
! 6196: * @max: the maximal value on the counter
! 6197: *
! 6198: * Create a new counter
! 6199: *
! 6200: * Returns the counter number or -1 in case of error
! 6201: */
! 6202: int
! 6203: xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
! 6204: int ret;
! 6205:
! 6206: if (am == NULL)
! 6207: return(-1);
! 6208:
! 6209: ret = xmlRegGetCounter(am);
! 6210: if (ret < 0)
! 6211: return(-1);
! 6212: am->counters[ret].min = min;
! 6213: am->counters[ret].max = max;
! 6214: return(ret);
! 6215: }
! 6216:
! 6217: /**
! 6218: * xmlAutomataNewCountedTrans:
! 6219: * @am: an automata
! 6220: * @from: the starting point of the transition
! 6221: * @to: the target point of the transition or NULL
! 6222: * @counter: the counter associated to that transition
! 6223: *
! 6224: * If @to is NULL, this creates first a new target state in the automata
! 6225: * and then adds an epsilon transition from the @from state to the target state
! 6226: * which will increment the counter provided
! 6227: *
! 6228: * Returns the target state or NULL in case of error
! 6229: */
! 6230: xmlAutomataStatePtr
! 6231: xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 6232: xmlAutomataStatePtr to, int counter) {
! 6233: if ((am == NULL) || (from == NULL) || (counter < 0))
! 6234: return(NULL);
! 6235: xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
! 6236: if (to == NULL)
! 6237: return(am->state);
! 6238: return(to);
! 6239: }
! 6240:
! 6241: /**
! 6242: * xmlAutomataNewCounterTrans:
! 6243: * @am: an automata
! 6244: * @from: the starting point of the transition
! 6245: * @to: the target point of the transition or NULL
! 6246: * @counter: the counter associated to that transition
! 6247: *
! 6248: * If @to is NULL, this creates first a new target state in the automata
! 6249: * and then adds an epsilon transition from the @from state to the target state
! 6250: * which will be allowed only if the counter is within the right range.
! 6251: *
! 6252: * Returns the target state or NULL in case of error
! 6253: */
! 6254: xmlAutomataStatePtr
! 6255: xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
! 6256: xmlAutomataStatePtr to, int counter) {
! 6257: if ((am == NULL) || (from == NULL) || (counter < 0))
! 6258: return(NULL);
! 6259: xmlFAGenerateCountedTransition(am, from, to, counter);
! 6260: if (to == NULL)
! 6261: return(am->state);
! 6262: return(to);
! 6263: }
! 6264:
! 6265: /**
! 6266: * xmlAutomataCompile:
! 6267: * @am: an automata
! 6268: *
! 6269: * Compile the automata into a Reg Exp ready for being executed.
! 6270: * The automata should be free after this point.
! 6271: *
! 6272: * Returns the compiled regexp or NULL in case of error
! 6273: */
! 6274: xmlRegexpPtr
! 6275: xmlAutomataCompile(xmlAutomataPtr am) {
! 6276: xmlRegexpPtr ret;
! 6277:
! 6278: if ((am == NULL) || (am->error != 0)) return(NULL);
! 6279: xmlFAEliminateEpsilonTransitions(am);
! 6280: /* xmlFAComputesDeterminism(am); */
! 6281: ret = xmlRegEpxFromParse(am);
! 6282:
! 6283: return(ret);
! 6284: }
! 6285:
! 6286: /**
! 6287: * xmlAutomataIsDeterminist:
! 6288: * @am: an automata
! 6289: *
! 6290: * Checks if an automata is determinist.
! 6291: *
! 6292: * Returns 1 if true, 0 if not, and -1 in case of error
! 6293: */
! 6294: int
! 6295: xmlAutomataIsDeterminist(xmlAutomataPtr am) {
! 6296: int ret;
! 6297:
! 6298: if (am == NULL)
! 6299: return(-1);
! 6300:
! 6301: ret = xmlFAComputesDeterminism(am);
! 6302: return(ret);
! 6303: }
! 6304: #endif /* LIBXML_AUTOMATA_ENABLED */
! 6305:
! 6306: #ifdef LIBXML_EXPR_ENABLED
! 6307: /************************************************************************
! 6308: * *
! 6309: * Formal Expression handling code *
! 6310: * *
! 6311: ************************************************************************/
! 6312: /************************************************************************
! 6313: * *
! 6314: * Expression handling context *
! 6315: * *
! 6316: ************************************************************************/
! 6317:
! 6318: struct _xmlExpCtxt {
! 6319: xmlDictPtr dict;
! 6320: xmlExpNodePtr *table;
! 6321: int size;
! 6322: int nbElems;
! 6323: int nb_nodes;
! 6324: int maxNodes;
! 6325: const char *expr;
! 6326: const char *cur;
! 6327: int nb_cons;
! 6328: int tabSize;
! 6329: };
! 6330:
! 6331: /**
! 6332: * xmlExpNewCtxt:
! 6333: * @maxNodes: the maximum number of nodes
! 6334: * @dict: optional dictionnary to use internally
! 6335: *
! 6336: * Creates a new context for manipulating expressions
! 6337: *
! 6338: * Returns the context or NULL in case of error
! 6339: */
! 6340: xmlExpCtxtPtr
! 6341: xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
! 6342: xmlExpCtxtPtr ret;
! 6343: int size = 256;
! 6344:
! 6345: if (maxNodes <= 4096)
! 6346: maxNodes = 4096;
! 6347:
! 6348: ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
! 6349: if (ret == NULL)
! 6350: return(NULL);
! 6351: memset(ret, 0, sizeof(xmlExpCtxt));
! 6352: ret->size = size;
! 6353: ret->nbElems = 0;
! 6354: ret->maxNodes = maxNodes;
! 6355: ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
! 6356: if (ret->table == NULL) {
! 6357: xmlFree(ret);
! 6358: return(NULL);
! 6359: }
! 6360: memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
! 6361: if (dict == NULL) {
! 6362: ret->dict = xmlDictCreate();
! 6363: if (ret->dict == NULL) {
! 6364: xmlFree(ret->table);
! 6365: xmlFree(ret);
! 6366: return(NULL);
! 6367: }
! 6368: } else {
! 6369: ret->dict = dict;
! 6370: xmlDictReference(ret->dict);
! 6371: }
! 6372: return(ret);
! 6373: }
! 6374:
! 6375: /**
! 6376: * xmlExpFreeCtxt:
! 6377: * @ctxt: an expression context
! 6378: *
! 6379: * Free an expression context
! 6380: */
! 6381: void
! 6382: xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
! 6383: if (ctxt == NULL)
! 6384: return;
! 6385: xmlDictFree(ctxt->dict);
! 6386: if (ctxt->table != NULL)
! 6387: xmlFree(ctxt->table);
! 6388: xmlFree(ctxt);
! 6389: }
! 6390:
! 6391: /************************************************************************
! 6392: * *
! 6393: * Structure associated to an expression node *
! 6394: * *
! 6395: ************************************************************************/
! 6396: #define MAX_NODES 10000
! 6397:
! 6398: /* #define DEBUG_DERIV */
! 6399:
! 6400: /*
! 6401: * TODO:
! 6402: * - Wildcards
! 6403: * - public API for creation
! 6404: *
! 6405: * Started
! 6406: * - regression testing
! 6407: *
! 6408: * Done
! 6409: * - split into module and test tool
! 6410: * - memleaks
! 6411: */
! 6412:
! 6413: typedef enum {
! 6414: XML_EXP_NILABLE = (1 << 0)
! 6415: } xmlExpNodeInfo;
! 6416:
! 6417: #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
! 6418:
! 6419: struct _xmlExpNode {
! 6420: unsigned char type;/* xmlExpNodeType */
! 6421: unsigned char info;/* OR of xmlExpNodeInfo */
! 6422: unsigned short key; /* the hash key */
! 6423: unsigned int ref; /* The number of references */
! 6424: int c_max; /* the maximum length it can consume */
! 6425: xmlExpNodePtr exp_left;
! 6426: xmlExpNodePtr next;/* the next node in the hash table or free list */
! 6427: union {
! 6428: struct {
! 6429: int f_min;
! 6430: int f_max;
! 6431: } count;
! 6432: struct {
! 6433: xmlExpNodePtr f_right;
! 6434: } children;
! 6435: const xmlChar *f_str;
! 6436: } field;
! 6437: };
! 6438:
! 6439: #define exp_min field.count.f_min
! 6440: #define exp_max field.count.f_max
! 6441: /* #define exp_left field.children.f_left */
! 6442: #define exp_right field.children.f_right
! 6443: #define exp_str field.f_str
! 6444:
! 6445: static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
! 6446: static xmlExpNode forbiddenExpNode = {
! 6447: XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
! 6448: };
! 6449: xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
! 6450: static xmlExpNode emptyExpNode = {
! 6451: XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
! 6452: };
! 6453: xmlExpNodePtr emptyExp = &emptyExpNode;
! 6454:
! 6455: /************************************************************************
! 6456: * *
! 6457: * The custom hash table for unicity and canonicalization *
! 6458: * of sub-expressions pointers *
! 6459: * *
! 6460: ************************************************************************/
! 6461: /*
! 6462: * xmlExpHashNameComputeKey:
! 6463: * Calculate the hash key for a token
! 6464: */
! 6465: static unsigned short
! 6466: xmlExpHashNameComputeKey(const xmlChar *name) {
! 6467: unsigned short value = 0L;
! 6468: char ch;
! 6469:
! 6470: if (name != NULL) {
! 6471: value += 30 * (*name);
! 6472: while ((ch = *name++) != 0) {
! 6473: value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
! 6474: }
! 6475: }
! 6476: return (value);
! 6477: }
! 6478:
! 6479: /*
! 6480: * xmlExpHashComputeKey:
! 6481: * Calculate the hash key for a compound expression
! 6482: */
! 6483: static unsigned short
! 6484: xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
! 6485: xmlExpNodePtr right) {
! 6486: unsigned long value;
! 6487: unsigned short ret;
! 6488:
! 6489: switch (type) {
! 6490: case XML_EXP_SEQ:
! 6491: value = left->key;
! 6492: value += right->key;
! 6493: value *= 3;
! 6494: ret = (unsigned short) value;
! 6495: break;
! 6496: case XML_EXP_OR:
! 6497: value = left->key;
! 6498: value += right->key;
! 6499: value *= 7;
! 6500: ret = (unsigned short) value;
! 6501: break;
! 6502: case XML_EXP_COUNT:
! 6503: value = left->key;
! 6504: value += right->key;
! 6505: ret = (unsigned short) value;
! 6506: break;
! 6507: default:
! 6508: ret = 0;
! 6509: }
! 6510: return(ret);
! 6511: }
! 6512:
! 6513:
! 6514: static xmlExpNodePtr
! 6515: xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
! 6516: xmlExpNodePtr ret;
! 6517:
! 6518: if (ctxt->nb_nodes >= MAX_NODES)
! 6519: return(NULL);
! 6520: ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
! 6521: if (ret == NULL)
! 6522: return(NULL);
! 6523: memset(ret, 0, sizeof(xmlExpNode));
! 6524: ret->type = type;
! 6525: ret->next = NULL;
! 6526: ctxt->nb_nodes++;
! 6527: ctxt->nb_cons++;
! 6528: return(ret);
! 6529: }
! 6530:
! 6531: /**
! 6532: * xmlExpHashGetEntry:
! 6533: * @table: the hash table
! 6534: *
! 6535: * Get the unique entry from the hash table. The entry is created if
! 6536: * needed. @left and @right are consumed, i.e. their ref count will
! 6537: * be decremented by the operation.
! 6538: *
! 6539: * Returns the pointer or NULL in case of error
! 6540: */
! 6541: static xmlExpNodePtr
! 6542: xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
! 6543: xmlExpNodePtr left, xmlExpNodePtr right,
! 6544: const xmlChar *name, int min, int max) {
! 6545: unsigned short kbase, key;
! 6546: xmlExpNodePtr entry;
! 6547: xmlExpNodePtr insert;
! 6548:
! 6549: if (ctxt == NULL)
! 6550: return(NULL);
! 6551:
! 6552: /*
! 6553: * Check for duplicate and insertion location.
! 6554: */
! 6555: if (type == XML_EXP_ATOM) {
! 6556: kbase = xmlExpHashNameComputeKey(name);
! 6557: } else if (type == XML_EXP_COUNT) {
! 6558: /* COUNT reduction rule 1 */
! 6559: /* a{1} -> a */
! 6560: if (min == max) {
! 6561: if (min == 1) {
! 6562: return(left);
! 6563: }
! 6564: if (min == 0) {
! 6565: xmlExpFree(ctxt, left);
! 6566: return(emptyExp);
! 6567: }
! 6568: }
! 6569: if (min < 0) {
! 6570: xmlExpFree(ctxt, left);
! 6571: return(forbiddenExp);
! 6572: }
! 6573: if (max == -1)
! 6574: kbase = min + 79;
! 6575: else
! 6576: kbase = max - min;
! 6577: kbase += left->key;
! 6578: } else if (type == XML_EXP_OR) {
! 6579: /* Forbid reduction rules */
! 6580: if (left->type == XML_EXP_FORBID) {
! 6581: xmlExpFree(ctxt, left);
! 6582: return(right);
! 6583: }
! 6584: if (right->type == XML_EXP_FORBID) {
! 6585: xmlExpFree(ctxt, right);
! 6586: return(left);
! 6587: }
! 6588:
! 6589: /* OR reduction rule 1 */
! 6590: /* a | a reduced to a */
! 6591: if (left == right) {
! 6592: left->ref--;
! 6593: return(left);
! 6594: }
! 6595: /* OR canonicalization rule 1 */
! 6596: /* linearize (a | b) | c into a | (b | c) */
! 6597: if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
! 6598: xmlExpNodePtr tmp = left;
! 6599: left = right;
! 6600: right = tmp;
! 6601: }
! 6602: /* OR reduction rule 2 */
! 6603: /* a | (a | b) and b | (a | b) are reduced to a | b */
! 6604: if (right->type == XML_EXP_OR) {
! 6605: if ((left == right->exp_left) ||
! 6606: (left == right->exp_right)) {
! 6607: xmlExpFree(ctxt, left);
! 6608: return(right);
! 6609: }
! 6610: }
! 6611: /* OR canonicalization rule 2 */
! 6612: /* linearize (a | b) | c into a | (b | c) */
! 6613: if (left->type == XML_EXP_OR) {
! 6614: xmlExpNodePtr tmp;
! 6615:
! 6616: /* OR canonicalization rule 2 */
! 6617: if ((left->exp_right->type != XML_EXP_OR) &&
! 6618: (left->exp_right->key < left->exp_left->key)) {
! 6619: tmp = left->exp_right;
! 6620: left->exp_right = left->exp_left;
! 6621: left->exp_left = tmp;
! 6622: }
! 6623: left->exp_right->ref++;
! 6624: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
! 6625: NULL, 0, 0);
! 6626: left->exp_left->ref++;
! 6627: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
! 6628: NULL, 0, 0);
! 6629:
! 6630: xmlExpFree(ctxt, left);
! 6631: return(tmp);
! 6632: }
! 6633: if (right->type == XML_EXP_OR) {
! 6634: /* Ordering in the tree */
! 6635: /* C | (A | B) -> A | (B | C) */
! 6636: if (left->key > right->exp_right->key) {
! 6637: xmlExpNodePtr tmp;
! 6638: right->exp_right->ref++;
! 6639: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
! 6640: left, NULL, 0, 0);
! 6641: right->exp_left->ref++;
! 6642: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
! 6643: tmp, NULL, 0, 0);
! 6644: xmlExpFree(ctxt, right);
! 6645: return(tmp);
! 6646: }
! 6647: /* Ordering in the tree */
! 6648: /* B | (A | C) -> A | (B | C) */
! 6649: if (left->key > right->exp_left->key) {
! 6650: xmlExpNodePtr tmp;
! 6651: right->exp_right->ref++;
! 6652: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
! 6653: right->exp_right, NULL, 0, 0);
! 6654: right->exp_left->ref++;
! 6655: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
! 6656: tmp, NULL, 0, 0);
! 6657: xmlExpFree(ctxt, right);
! 6658: return(tmp);
! 6659: }
! 6660: }
! 6661: /* we know both types are != XML_EXP_OR here */
! 6662: else if (left->key > right->key) {
! 6663: xmlExpNodePtr tmp = left;
! 6664: left = right;
! 6665: right = tmp;
! 6666: }
! 6667: kbase = xmlExpHashComputeKey(type, left, right);
! 6668: } else if (type == XML_EXP_SEQ) {
! 6669: /* Forbid reduction rules */
! 6670: if (left->type == XML_EXP_FORBID) {
! 6671: xmlExpFree(ctxt, right);
! 6672: return(left);
! 6673: }
! 6674: if (right->type == XML_EXP_FORBID) {
! 6675: xmlExpFree(ctxt, left);
! 6676: return(right);
! 6677: }
! 6678: /* Empty reduction rules */
! 6679: if (right->type == XML_EXP_EMPTY) {
! 6680: return(left);
! 6681: }
! 6682: if (left->type == XML_EXP_EMPTY) {
! 6683: return(right);
! 6684: }
! 6685: kbase = xmlExpHashComputeKey(type, left, right);
! 6686: } else
! 6687: return(NULL);
! 6688:
! 6689: key = kbase % ctxt->size;
! 6690: if (ctxt->table[key] != NULL) {
! 6691: for (insert = ctxt->table[key]; insert != NULL;
! 6692: insert = insert->next) {
! 6693: if ((insert->key == kbase) &&
! 6694: (insert->type == type)) {
! 6695: if (type == XML_EXP_ATOM) {
! 6696: if (name == insert->exp_str) {
! 6697: insert->ref++;
! 6698: return(insert);
! 6699: }
! 6700: } else if (type == XML_EXP_COUNT) {
! 6701: if ((insert->exp_min == min) && (insert->exp_max == max) &&
! 6702: (insert->exp_left == left)) {
! 6703: insert->ref++;
! 6704: left->ref--;
! 6705: return(insert);
! 6706: }
! 6707: } else if ((insert->exp_left == left) &&
! 6708: (insert->exp_right == right)) {
! 6709: insert->ref++;
! 6710: left->ref--;
! 6711: right->ref--;
! 6712: return(insert);
! 6713: }
! 6714: }
! 6715: }
! 6716: }
! 6717:
! 6718: entry = xmlExpNewNode(ctxt, type);
! 6719: if (entry == NULL)
! 6720: return(NULL);
! 6721: entry->key = kbase;
! 6722: if (type == XML_EXP_ATOM) {
! 6723: entry->exp_str = name;
! 6724: entry->c_max = 1;
! 6725: } else if (type == XML_EXP_COUNT) {
! 6726: entry->exp_min = min;
! 6727: entry->exp_max = max;
! 6728: entry->exp_left = left;
! 6729: if ((min == 0) || (IS_NILLABLE(left)))
! 6730: entry->info |= XML_EXP_NILABLE;
! 6731: if (max < 0)
! 6732: entry->c_max = -1;
! 6733: else
! 6734: entry->c_max = max * entry->exp_left->c_max;
! 6735: } else {
! 6736: entry->exp_left = left;
! 6737: entry->exp_right = right;
! 6738: if (type == XML_EXP_OR) {
! 6739: if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
! 6740: entry->info |= XML_EXP_NILABLE;
! 6741: if ((entry->exp_left->c_max == -1) ||
! 6742: (entry->exp_right->c_max == -1))
! 6743: entry->c_max = -1;
! 6744: else if (entry->exp_left->c_max > entry->exp_right->c_max)
! 6745: entry->c_max = entry->exp_left->c_max;
! 6746: else
! 6747: entry->c_max = entry->exp_right->c_max;
! 6748: } else {
! 6749: if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
! 6750: entry->info |= XML_EXP_NILABLE;
! 6751: if ((entry->exp_left->c_max == -1) ||
! 6752: (entry->exp_right->c_max == -1))
! 6753: entry->c_max = -1;
! 6754: else
! 6755: entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
! 6756: }
! 6757: }
! 6758: entry->ref = 1;
! 6759: if (ctxt->table[key] != NULL)
! 6760: entry->next = ctxt->table[key];
! 6761:
! 6762: ctxt->table[key] = entry;
! 6763: ctxt->nbElems++;
! 6764:
! 6765: return(entry);
! 6766: }
! 6767:
! 6768: /**
! 6769: * xmlExpFree:
! 6770: * @ctxt: the expression context
! 6771: * @exp: the expression
! 6772: *
! 6773: * Dereference the expression
! 6774: */
! 6775: void
! 6776: xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
! 6777: if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
! 6778: return;
! 6779: exp->ref--;
! 6780: if (exp->ref == 0) {
! 6781: unsigned short key;
! 6782:
! 6783: /* Unlink it first from the hash table */
! 6784: key = exp->key % ctxt->size;
! 6785: if (ctxt->table[key] == exp) {
! 6786: ctxt->table[key] = exp->next;
! 6787: } else {
! 6788: xmlExpNodePtr tmp;
! 6789:
! 6790: tmp = ctxt->table[key];
! 6791: while (tmp != NULL) {
! 6792: if (tmp->next == exp) {
! 6793: tmp->next = exp->next;
! 6794: break;
! 6795: }
! 6796: tmp = tmp->next;
! 6797: }
! 6798: }
! 6799:
! 6800: if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
! 6801: xmlExpFree(ctxt, exp->exp_left);
! 6802: xmlExpFree(ctxt, exp->exp_right);
! 6803: } else if (exp->type == XML_EXP_COUNT) {
! 6804: xmlExpFree(ctxt, exp->exp_left);
! 6805: }
! 6806: xmlFree(exp);
! 6807: ctxt->nb_nodes--;
! 6808: }
! 6809: }
! 6810:
! 6811: /**
! 6812: * xmlExpRef:
! 6813: * @exp: the expression
! 6814: *
! 6815: * Increase the reference count of the expression
! 6816: */
! 6817: void
! 6818: xmlExpRef(xmlExpNodePtr exp) {
! 6819: if (exp != NULL)
! 6820: exp->ref++;
! 6821: }
! 6822:
! 6823: /**
! 6824: * xmlExpNewAtom:
! 6825: * @ctxt: the expression context
! 6826: * @name: the atom name
! 6827: * @len: the atom name lenght in byte (or -1);
! 6828: *
! 6829: * Get the atom associated to this name from that context
! 6830: *
! 6831: * Returns the node or NULL in case of error
! 6832: */
! 6833: xmlExpNodePtr
! 6834: xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
! 6835: if ((ctxt == NULL) || (name == NULL))
! 6836: return(NULL);
! 6837: name = xmlDictLookup(ctxt->dict, name, len);
! 6838: if (name == NULL)
! 6839: return(NULL);
! 6840: return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
! 6841: }
! 6842:
! 6843: /**
! 6844: * xmlExpNewOr:
! 6845: * @ctxt: the expression context
! 6846: * @left: left expression
! 6847: * @right: right expression
! 6848: *
! 6849: * Get the atom associated to the choice @left | @right
! 6850: * Note that @left and @right are consumed in the operation, to keep
! 6851: * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
! 6852: * this is true even in case of failure (unless ctxt == NULL).
! 6853: *
! 6854: * Returns the node or NULL in case of error
! 6855: */
! 6856: xmlExpNodePtr
! 6857: xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
! 6858: if (ctxt == NULL)
! 6859: return(NULL);
! 6860: if ((left == NULL) || (right == NULL)) {
! 6861: xmlExpFree(ctxt, left);
! 6862: xmlExpFree(ctxt, right);
! 6863: return(NULL);
! 6864: }
! 6865: return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
! 6866: }
! 6867:
! 6868: /**
! 6869: * xmlExpNewSeq:
! 6870: * @ctxt: the expression context
! 6871: * @left: left expression
! 6872: * @right: right expression
! 6873: *
! 6874: * Get the atom associated to the sequence @left , @right
! 6875: * Note that @left and @right are consumed in the operation, to keep
! 6876: * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
! 6877: * this is true even in case of failure (unless ctxt == NULL).
! 6878: *
! 6879: * Returns the node or NULL in case of error
! 6880: */
! 6881: xmlExpNodePtr
! 6882: xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
! 6883: if (ctxt == NULL)
! 6884: return(NULL);
! 6885: if ((left == NULL) || (right == NULL)) {
! 6886: xmlExpFree(ctxt, left);
! 6887: xmlExpFree(ctxt, right);
! 6888: return(NULL);
! 6889: }
! 6890: return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
! 6891: }
! 6892:
! 6893: /**
! 6894: * xmlExpNewRange:
! 6895: * @ctxt: the expression context
! 6896: * @subset: the expression to be repeated
! 6897: * @min: the lower bound for the repetition
! 6898: * @max: the upper bound for the repetition, -1 means infinite
! 6899: *
! 6900: * Get the atom associated to the range (@subset){@min, @max}
! 6901: * Note that @subset is consumed in the operation, to keep
! 6902: * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
! 6903: * this is true even in case of failure (unless ctxt == NULL).
! 6904: *
! 6905: * Returns the node or NULL in case of error
! 6906: */
! 6907: xmlExpNodePtr
! 6908: xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
! 6909: if (ctxt == NULL)
! 6910: return(NULL);
! 6911: if ((subset == NULL) || (min < 0) || (max < -1) ||
! 6912: ((max >= 0) && (min > max))) {
! 6913: xmlExpFree(ctxt, subset);
! 6914: return(NULL);
! 6915: }
! 6916: return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
! 6917: NULL, NULL, min, max));
! 6918: }
! 6919:
! 6920: /************************************************************************
! 6921: * *
! 6922: * Public API for operations on expressions *
! 6923: * *
! 6924: ************************************************************************/
! 6925:
! 6926: static int
! 6927: xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
! 6928: const xmlChar**list, int len, int nb) {
! 6929: int tmp, tmp2;
! 6930: tail:
! 6931: switch (exp->type) {
! 6932: case XML_EXP_EMPTY:
! 6933: return(0);
! 6934: case XML_EXP_ATOM:
! 6935: for (tmp = 0;tmp < nb;tmp++)
! 6936: if (list[tmp] == exp->exp_str)
! 6937: return(0);
! 6938: if (nb >= len)
! 6939: return(-2);
! 6940: list[nb] = exp->exp_str;
! 6941: return(1);
! 6942: case XML_EXP_COUNT:
! 6943: exp = exp->exp_left;
! 6944: goto tail;
! 6945: case XML_EXP_SEQ:
! 6946: case XML_EXP_OR:
! 6947: tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
! 6948: if (tmp < 0)
! 6949: return(tmp);
! 6950: tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
! 6951: nb + tmp);
! 6952: if (tmp2 < 0)
! 6953: return(tmp2);
! 6954: return(tmp + tmp2);
! 6955: }
! 6956: return(-1);
! 6957: }
! 6958:
! 6959: /**
! 6960: * xmlExpGetLanguage:
! 6961: * @ctxt: the expression context
! 6962: * @exp: the expression
! 6963: * @langList: where to store the tokens
! 6964: * @len: the allocated lenght of @list
! 6965: *
! 6966: * Find all the strings used in @exp and store them in @list
! 6967: *
! 6968: * Returns the number of unique strings found, -1 in case of errors and
! 6969: * -2 if there is more than @len strings
! 6970: */
! 6971: int
! 6972: xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
! 6973: const xmlChar**langList, int len) {
! 6974: if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
! 6975: return(-1);
! 6976: return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
! 6977: }
! 6978:
! 6979: static int
! 6980: xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
! 6981: const xmlChar**list, int len, int nb) {
! 6982: int tmp, tmp2;
! 6983: tail:
! 6984: switch (exp->type) {
! 6985: case XML_EXP_FORBID:
! 6986: return(0);
! 6987: case XML_EXP_EMPTY:
! 6988: return(0);
! 6989: case XML_EXP_ATOM:
! 6990: for (tmp = 0;tmp < nb;tmp++)
! 6991: if (list[tmp] == exp->exp_str)
! 6992: return(0);
! 6993: if (nb >= len)
! 6994: return(-2);
! 6995: list[nb] = exp->exp_str;
! 6996: return(1);
! 6997: case XML_EXP_COUNT:
! 6998: exp = exp->exp_left;
! 6999: goto tail;
! 7000: case XML_EXP_SEQ:
! 7001: tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
! 7002: if (tmp < 0)
! 7003: return(tmp);
! 7004: if (IS_NILLABLE(exp->exp_left)) {
! 7005: tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
! 7006: nb + tmp);
! 7007: if (tmp2 < 0)
! 7008: return(tmp2);
! 7009: tmp += tmp2;
! 7010: }
! 7011: return(tmp);
! 7012: case XML_EXP_OR:
! 7013: tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
! 7014: if (tmp < 0)
! 7015: return(tmp);
! 7016: tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
! 7017: nb + tmp);
! 7018: if (tmp2 < 0)
! 7019: return(tmp2);
! 7020: return(tmp + tmp2);
! 7021: }
! 7022: return(-1);
! 7023: }
! 7024:
! 7025: /**
! 7026: * xmlExpGetStart:
! 7027: * @ctxt: the expression context
! 7028: * @exp: the expression
! 7029: * @tokList: where to store the tokens
! 7030: * @len: the allocated lenght of @list
! 7031: *
! 7032: * Find all the strings that appears at the start of the languages
! 7033: * accepted by @exp and store them in @list. E.g. for (a, b) | c
! 7034: * it will return the list [a, c]
! 7035: *
! 7036: * Returns the number of unique strings found, -1 in case of errors and
! 7037: * -2 if there is more than @len strings
! 7038: */
! 7039: int
! 7040: xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
! 7041: const xmlChar**tokList, int len) {
! 7042: if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
! 7043: return(-1);
! 7044: return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
! 7045: }
! 7046:
! 7047: /**
! 7048: * xmlExpIsNillable:
! 7049: * @exp: the expression
! 7050: *
! 7051: * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
! 7052: *
! 7053: * Returns 1 if nillable, 0 if not and -1 in case of error
! 7054: */
! 7055: int
! 7056: xmlExpIsNillable(xmlExpNodePtr exp) {
! 7057: if (exp == NULL)
! 7058: return(-1);
! 7059: return(IS_NILLABLE(exp) != 0);
! 7060: }
! 7061:
! 7062: static xmlExpNodePtr
! 7063: xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
! 7064: {
! 7065: xmlExpNodePtr ret;
! 7066:
! 7067: switch (exp->type) {
! 7068: case XML_EXP_EMPTY:
! 7069: return(forbiddenExp);
! 7070: case XML_EXP_FORBID:
! 7071: return(forbiddenExp);
! 7072: case XML_EXP_ATOM:
! 7073: if (exp->exp_str == str) {
! 7074: #ifdef DEBUG_DERIV
! 7075: printf("deriv atom: equal => Empty\n");
! 7076: #endif
! 7077: ret = emptyExp;
! 7078: } else {
! 7079: #ifdef DEBUG_DERIV
! 7080: printf("deriv atom: mismatch => forbid\n");
! 7081: #endif
! 7082: /* TODO wildcards here */
! 7083: ret = forbiddenExp;
! 7084: }
! 7085: return(ret);
! 7086: case XML_EXP_OR: {
! 7087: xmlExpNodePtr tmp;
! 7088:
! 7089: #ifdef DEBUG_DERIV
! 7090: printf("deriv or: => or(derivs)\n");
! 7091: #endif
! 7092: tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
! 7093: if (tmp == NULL) {
! 7094: return(NULL);
! 7095: }
! 7096: ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
! 7097: if (ret == NULL) {
! 7098: xmlExpFree(ctxt, tmp);
! 7099: return(NULL);
! 7100: }
! 7101: ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
! 7102: NULL, 0, 0);
! 7103: return(ret);
! 7104: }
! 7105: case XML_EXP_SEQ:
! 7106: #ifdef DEBUG_DERIV
! 7107: printf("deriv seq: starting with left\n");
! 7108: #endif
! 7109: ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
! 7110: if (ret == NULL) {
! 7111: return(NULL);
! 7112: } else if (ret == forbiddenExp) {
! 7113: if (IS_NILLABLE(exp->exp_left)) {
! 7114: #ifdef DEBUG_DERIV
! 7115: printf("deriv seq: left failed but nillable\n");
! 7116: #endif
! 7117: ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
! 7118: }
! 7119: } else {
! 7120: #ifdef DEBUG_DERIV
! 7121: printf("deriv seq: left match => sequence\n");
! 7122: #endif
! 7123: exp->exp_right->ref++;
! 7124: ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
! 7125: NULL, 0, 0);
! 7126: }
! 7127: return(ret);
! 7128: case XML_EXP_COUNT: {
! 7129: int min, max;
! 7130: xmlExpNodePtr tmp;
! 7131:
! 7132: if (exp->exp_max == 0)
! 7133: return(forbiddenExp);
! 7134: ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
! 7135: if (ret == NULL)
! 7136: return(NULL);
! 7137: if (ret == forbiddenExp) {
! 7138: #ifdef DEBUG_DERIV
! 7139: printf("deriv count: pattern mismatch => forbid\n");
! 7140: #endif
! 7141: return(ret);
! 7142: }
! 7143: if (exp->exp_max == 1)
! 7144: return(ret);
! 7145: if (exp->exp_max < 0) /* unbounded */
! 7146: max = -1;
! 7147: else
! 7148: max = exp->exp_max - 1;
! 7149: if (exp->exp_min > 0)
! 7150: min = exp->exp_min - 1;
! 7151: else
! 7152: min = 0;
! 7153: exp->exp_left->ref++;
! 7154: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
! 7155: NULL, min, max);
! 7156: if (ret == emptyExp) {
! 7157: #ifdef DEBUG_DERIV
! 7158: printf("deriv count: match to empty => new count\n");
! 7159: #endif
! 7160: return(tmp);
! 7161: }
! 7162: #ifdef DEBUG_DERIV
! 7163: printf("deriv count: match => sequence with new count\n");
! 7164: #endif
! 7165: return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
! 7166: NULL, 0, 0));
! 7167: }
! 7168: }
! 7169: return(NULL);
! 7170: }
! 7171:
! 7172: /**
! 7173: * xmlExpStringDerive:
! 7174: * @ctxt: the expression context
! 7175: * @exp: the expression
! 7176: * @str: the string
! 7177: * @len: the string len in bytes if available
! 7178: *
! 7179: * Do one step of Brzozowski derivation of the expression @exp with
! 7180: * respect to the input string
! 7181: *
! 7182: * Returns the resulting expression or NULL in case of internal error
! 7183: */
! 7184: xmlExpNodePtr
! 7185: xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
! 7186: const xmlChar *str, int len) {
! 7187: const xmlChar *input;
! 7188:
! 7189: if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
! 7190: return(NULL);
! 7191: }
! 7192: /*
! 7193: * check the string is in the dictionnary, if yes use an interned
! 7194: * copy, otherwise we know it's not an acceptable input
! 7195: */
! 7196: input = xmlDictExists(ctxt->dict, str, len);
! 7197: if (input == NULL) {
! 7198: return(forbiddenExp);
! 7199: }
! 7200: return(xmlExpStringDeriveInt(ctxt, exp, input));
! 7201: }
! 7202:
! 7203: static int
! 7204: xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
! 7205: int ret = 1;
! 7206:
! 7207: if (sub->c_max == -1) {
! 7208: if (exp->c_max != -1)
! 7209: ret = 0;
! 7210: } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
! 7211: ret = 0;
! 7212: }
! 7213: #if 0
! 7214: if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
! 7215: ret = 0;
! 7216: #endif
! 7217: return(ret);
! 7218: }
! 7219:
! 7220: static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
! 7221: xmlExpNodePtr sub);
! 7222: /**
! 7223: * xmlExpDivide:
! 7224: * @ctxt: the expressions context
! 7225: * @exp: the englobing expression
! 7226: * @sub: the subexpression
! 7227: * @mult: the multiple expression
! 7228: * @remain: the remain from the derivation of the multiple
! 7229: *
! 7230: * Check if exp is a multiple of sub, i.e. if there is a finite number n
! 7231: * so that sub{n} subsume exp
! 7232: *
! 7233: * Returns the multiple value if successful, 0 if it is not a multiple
! 7234: * and -1 in case of internel error.
! 7235: */
! 7236:
! 7237: static int
! 7238: xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
! 7239: xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
! 7240: int i;
! 7241: xmlExpNodePtr tmp, tmp2;
! 7242:
! 7243: if (mult != NULL) *mult = NULL;
! 7244: if (remain != NULL) *remain = NULL;
! 7245: if (exp->c_max == -1) return(0);
! 7246: if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
! 7247:
! 7248: for (i = 1;i <= exp->c_max;i++) {
! 7249: sub->ref++;
! 7250: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
! 7251: sub, NULL, NULL, i, i);
! 7252: if (tmp == NULL) {
! 7253: return(-1);
! 7254: }
! 7255: if (!xmlExpCheckCard(tmp, exp)) {
! 7256: xmlExpFree(ctxt, tmp);
! 7257: continue;
! 7258: }
! 7259: tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
! 7260: if (tmp2 == NULL) {
! 7261: xmlExpFree(ctxt, tmp);
! 7262: return(-1);
! 7263: }
! 7264: if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
! 7265: if (remain != NULL)
! 7266: *remain = tmp2;
! 7267: else
! 7268: xmlExpFree(ctxt, tmp2);
! 7269: if (mult != NULL)
! 7270: *mult = tmp;
! 7271: else
! 7272: xmlExpFree(ctxt, tmp);
! 7273: #ifdef DEBUG_DERIV
! 7274: printf("Divide succeeded %d\n", i);
! 7275: #endif
! 7276: return(i);
! 7277: }
! 7278: xmlExpFree(ctxt, tmp);
! 7279: xmlExpFree(ctxt, tmp2);
! 7280: }
! 7281: #ifdef DEBUG_DERIV
! 7282: printf("Divide failed\n");
! 7283: #endif
! 7284: return(0);
! 7285: }
! 7286:
! 7287: /**
! 7288: * xmlExpExpDeriveInt:
! 7289: * @ctxt: the expressions context
! 7290: * @exp: the englobing expression
! 7291: * @sub: the subexpression
! 7292: *
! 7293: * Try to do a step of Brzozowski derivation but at a higher level
! 7294: * the input being a subexpression.
! 7295: *
! 7296: * Returns the resulting expression or NULL in case of internal error
! 7297: */
! 7298: static xmlExpNodePtr
! 7299: xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
! 7300: xmlExpNodePtr ret, tmp, tmp2, tmp3;
! 7301: const xmlChar **tab;
! 7302: int len, i;
! 7303:
! 7304: /*
! 7305: * In case of equality and if the expression can only consume a finite
! 7306: * amount, then the derivation is empty
! 7307: */
! 7308: if ((exp == sub) && (exp->c_max >= 0)) {
! 7309: #ifdef DEBUG_DERIV
! 7310: printf("Equal(exp, sub) and finite -> Empty\n");
! 7311: #endif
! 7312: return(emptyExp);
! 7313: }
! 7314: /*
! 7315: * decompose sub sequence first
! 7316: */
! 7317: if (sub->type == XML_EXP_EMPTY) {
! 7318: #ifdef DEBUG_DERIV
! 7319: printf("Empty(sub) -> Empty\n");
! 7320: #endif
! 7321: exp->ref++;
! 7322: return(exp);
! 7323: }
! 7324: if (sub->type == XML_EXP_SEQ) {
! 7325: #ifdef DEBUG_DERIV
! 7326: printf("Seq(sub) -> decompose\n");
! 7327: #endif
! 7328: tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
! 7329: if (tmp == NULL)
! 7330: return(NULL);
! 7331: if (tmp == forbiddenExp)
! 7332: return(tmp);
! 7333: ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
! 7334: xmlExpFree(ctxt, tmp);
! 7335: return(ret);
! 7336: }
! 7337: if (sub->type == XML_EXP_OR) {
! 7338: #ifdef DEBUG_DERIV
! 7339: printf("Or(sub) -> decompose\n");
! 7340: #endif
! 7341: tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
! 7342: if (tmp == forbiddenExp)
! 7343: return(tmp);
! 7344: if (tmp == NULL)
! 7345: return(NULL);
! 7346: ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
! 7347: if ((ret == NULL) || (ret == forbiddenExp)) {
! 7348: xmlExpFree(ctxt, tmp);
! 7349: return(ret);
! 7350: }
! 7351: return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
! 7352: }
! 7353: if (!xmlExpCheckCard(exp, sub)) {
! 7354: #ifdef DEBUG_DERIV
! 7355: printf("CheckCard(exp, sub) failed -> Forbid\n");
! 7356: #endif
! 7357: return(forbiddenExp);
! 7358: }
! 7359: switch (exp->type) {
! 7360: case XML_EXP_EMPTY:
! 7361: if (sub == emptyExp)
! 7362: return(emptyExp);
! 7363: #ifdef DEBUG_DERIV
! 7364: printf("Empty(exp) -> Forbid\n");
! 7365: #endif
! 7366: return(forbiddenExp);
! 7367: case XML_EXP_FORBID:
! 7368: #ifdef DEBUG_DERIV
! 7369: printf("Forbid(exp) -> Forbid\n");
! 7370: #endif
! 7371: return(forbiddenExp);
! 7372: case XML_EXP_ATOM:
! 7373: if (sub->type == XML_EXP_ATOM) {
! 7374: /* TODO: handle wildcards */
! 7375: if (exp->exp_str == sub->exp_str) {
! 7376: #ifdef DEBUG_DERIV
! 7377: printf("Atom match -> Empty\n");
! 7378: #endif
! 7379: return(emptyExp);
! 7380: }
! 7381: #ifdef DEBUG_DERIV
! 7382: printf("Atom mismatch -> Forbid\n");
! 7383: #endif
! 7384: return(forbiddenExp);
! 7385: }
! 7386: if ((sub->type == XML_EXP_COUNT) &&
! 7387: (sub->exp_max == 1) &&
! 7388: (sub->exp_left->type == XML_EXP_ATOM)) {
! 7389: /* TODO: handle wildcards */
! 7390: if (exp->exp_str == sub->exp_left->exp_str) {
! 7391: #ifdef DEBUG_DERIV
! 7392: printf("Atom match -> Empty\n");
! 7393: #endif
! 7394: return(emptyExp);
! 7395: }
! 7396: #ifdef DEBUG_DERIV
! 7397: printf("Atom mismatch -> Forbid\n");
! 7398: #endif
! 7399: return(forbiddenExp);
! 7400: }
! 7401: #ifdef DEBUG_DERIV
! 7402: printf("Compex exp vs Atom -> Forbid\n");
! 7403: #endif
! 7404: return(forbiddenExp);
! 7405: case XML_EXP_SEQ:
! 7406: /* try to get the sequence consumed only if possible */
! 7407: if (xmlExpCheckCard(exp->exp_left, sub)) {
! 7408: /* See if the sequence can be consumed directly */
! 7409: #ifdef DEBUG_DERIV
! 7410: printf("Seq trying left only\n");
! 7411: #endif
! 7412: ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
! 7413: if ((ret != forbiddenExp) && (ret != NULL)) {
! 7414: #ifdef DEBUG_DERIV
! 7415: printf("Seq trying left only worked\n");
! 7416: #endif
! 7417: /*
! 7418: * TODO: assumption here that we are determinist
! 7419: * i.e. we won't get to a nillable exp left
! 7420: * subset which could be matched by the right
! 7421: * part too.
! 7422: * e.g.: (a | b)+,(a | c) and 'a+,a'
! 7423: */
! 7424: exp->exp_right->ref++;
! 7425: return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
! 7426: exp->exp_right, NULL, 0, 0));
! 7427: }
! 7428: #ifdef DEBUG_DERIV
! 7429: } else {
! 7430: printf("Seq: left too short\n");
! 7431: #endif
! 7432: }
! 7433: /* Try instead to decompose */
! 7434: if (sub->type == XML_EXP_COUNT) {
! 7435: int min, max;
! 7436:
! 7437: #ifdef DEBUG_DERIV
! 7438: printf("Seq: sub is a count\n");
! 7439: #endif
! 7440: ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
! 7441: if (ret == NULL)
! 7442: return(NULL);
! 7443: if (ret != forbiddenExp) {
! 7444: #ifdef DEBUG_DERIV
! 7445: printf("Seq , Count match on left\n");
! 7446: #endif
! 7447: if (sub->exp_max < 0)
! 7448: max = -1;
! 7449: else
! 7450: max = sub->exp_max -1;
! 7451: if (sub->exp_min > 0)
! 7452: min = sub->exp_min -1;
! 7453: else
! 7454: min = 0;
! 7455: exp->exp_right->ref++;
! 7456: tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
! 7457: exp->exp_right, NULL, 0, 0);
! 7458: if (tmp == NULL)
! 7459: return(NULL);
! 7460:
! 7461: sub->exp_left->ref++;
! 7462: tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
! 7463: sub->exp_left, NULL, NULL, min, max);
! 7464: if (tmp2 == NULL) {
! 7465: xmlExpFree(ctxt, tmp);
! 7466: return(NULL);
! 7467: }
! 7468: ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
! 7469: xmlExpFree(ctxt, tmp);
! 7470: xmlExpFree(ctxt, tmp2);
! 7471: return(ret);
! 7472: }
! 7473: }
! 7474: /* we made no progress on structured operations */
! 7475: break;
! 7476: case XML_EXP_OR:
! 7477: #ifdef DEBUG_DERIV
! 7478: printf("Or , trying both side\n");
! 7479: #endif
! 7480: ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
! 7481: if (ret == NULL)
! 7482: return(NULL);
! 7483: tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
! 7484: if (tmp == NULL) {
! 7485: xmlExpFree(ctxt, ret);
! 7486: return(NULL);
! 7487: }
! 7488: return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
! 7489: case XML_EXP_COUNT: {
! 7490: int min, max;
! 7491:
! 7492: if (sub->type == XML_EXP_COUNT) {
! 7493: /*
! 7494: * Try to see if the loop is completely subsumed
! 7495: */
! 7496: tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
! 7497: if (tmp == NULL)
! 7498: return(NULL);
! 7499: if (tmp == forbiddenExp) {
! 7500: int mult;
! 7501:
! 7502: #ifdef DEBUG_DERIV
! 7503: printf("Count, Count inner don't subsume\n");
! 7504: #endif
! 7505: mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
! 7506: NULL, &tmp);
! 7507: if (mult <= 0) {
! 7508: #ifdef DEBUG_DERIV
! 7509: printf("Count, Count not multiple => forbidden\n");
! 7510: #endif
! 7511: return(forbiddenExp);
! 7512: }
! 7513: if (sub->exp_max == -1) {
! 7514: max = -1;
! 7515: if (exp->exp_max == -1) {
! 7516: if (exp->exp_min <= sub->exp_min * mult)
! 7517: min = 0;
! 7518: else
! 7519: min = exp->exp_min - sub->exp_min * mult;
! 7520: } else {
! 7521: #ifdef DEBUG_DERIV
! 7522: printf("Count, Count finite can't subsume infinite\n");
! 7523: #endif
! 7524: xmlExpFree(ctxt, tmp);
! 7525: return(forbiddenExp);
! 7526: }
! 7527: } else {
! 7528: if (exp->exp_max == -1) {
! 7529: #ifdef DEBUG_DERIV
! 7530: printf("Infinite loop consume mult finite loop\n");
! 7531: #endif
! 7532: if (exp->exp_min > sub->exp_min * mult) {
! 7533: max = -1;
! 7534: min = exp->exp_min - sub->exp_min * mult;
! 7535: } else {
! 7536: max = -1;
! 7537: min = 0;
! 7538: }
! 7539: } else {
! 7540: if (exp->exp_max < sub->exp_max * mult) {
! 7541: #ifdef DEBUG_DERIV
! 7542: printf("loops max mult mismatch => forbidden\n");
! 7543: #endif
! 7544: xmlExpFree(ctxt, tmp);
! 7545: return(forbiddenExp);
! 7546: }
! 7547: if (sub->exp_max * mult > exp->exp_min)
! 7548: min = 0;
! 7549: else
! 7550: min = exp->exp_min - sub->exp_max * mult;
! 7551: max = exp->exp_max - sub->exp_max * mult;
! 7552: }
! 7553: }
! 7554: } else if (!IS_NILLABLE(tmp)) {
! 7555: /*
! 7556: * TODO: loop here to try to grow if working on finite
! 7557: * blocks.
! 7558: */
! 7559: #ifdef DEBUG_DERIV
! 7560: printf("Count, Count remain not nillable => forbidden\n");
! 7561: #endif
! 7562: xmlExpFree(ctxt, tmp);
! 7563: return(forbiddenExp);
! 7564: } else if (sub->exp_max == -1) {
! 7565: if (exp->exp_max == -1) {
! 7566: if (exp->exp_min <= sub->exp_min) {
! 7567: #ifdef DEBUG_DERIV
! 7568: printf("Infinite loops Okay => COUNT(0,Inf)\n");
! 7569: #endif
! 7570: max = -1;
! 7571: min = 0;
! 7572: } else {
! 7573: #ifdef DEBUG_DERIV
! 7574: printf("Infinite loops min => Count(X,Inf)\n");
! 7575: #endif
! 7576: max = -1;
! 7577: min = exp->exp_min - sub->exp_min;
! 7578: }
! 7579: } else if (exp->exp_min > sub->exp_min) {
! 7580: #ifdef DEBUG_DERIV
! 7581: printf("loops min mismatch 1 => forbidden ???\n");
! 7582: #endif
! 7583: xmlExpFree(ctxt, tmp);
! 7584: return(forbiddenExp);
! 7585: } else {
! 7586: max = -1;
! 7587: min = 0;
! 7588: }
! 7589: } else {
! 7590: if (exp->exp_max == -1) {
! 7591: #ifdef DEBUG_DERIV
! 7592: printf("Infinite loop consume finite loop\n");
! 7593: #endif
! 7594: if (exp->exp_min > sub->exp_min) {
! 7595: max = -1;
! 7596: min = exp->exp_min - sub->exp_min;
! 7597: } else {
! 7598: max = -1;
! 7599: min = 0;
! 7600: }
! 7601: } else {
! 7602: if (exp->exp_max < sub->exp_max) {
! 7603: #ifdef DEBUG_DERIV
! 7604: printf("loops max mismatch => forbidden\n");
! 7605: #endif
! 7606: xmlExpFree(ctxt, tmp);
! 7607: return(forbiddenExp);
! 7608: }
! 7609: if (sub->exp_max > exp->exp_min)
! 7610: min = 0;
! 7611: else
! 7612: min = exp->exp_min - sub->exp_max;
! 7613: max = exp->exp_max - sub->exp_max;
! 7614: }
! 7615: }
! 7616: #ifdef DEBUG_DERIV
! 7617: printf("loops match => SEQ(COUNT())\n");
! 7618: #endif
! 7619: exp->exp_left->ref++;
! 7620: tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
! 7621: NULL, NULL, min, max);
! 7622: if (tmp2 == NULL) {
! 7623: return(NULL);
! 7624: }
! 7625: ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
! 7626: NULL, 0, 0);
! 7627: return(ret);
! 7628: }
! 7629: tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
! 7630: if (tmp == NULL)
! 7631: return(NULL);
! 7632: if (tmp == forbiddenExp) {
! 7633: #ifdef DEBUG_DERIV
! 7634: printf("loop mismatch => forbidden\n");
! 7635: #endif
! 7636: return(forbiddenExp);
! 7637: }
! 7638: if (exp->exp_min > 0)
! 7639: min = exp->exp_min - 1;
! 7640: else
! 7641: min = 0;
! 7642: if (exp->exp_max < 0)
! 7643: max = -1;
! 7644: else
! 7645: max = exp->exp_max - 1;
! 7646:
! 7647: #ifdef DEBUG_DERIV
! 7648: printf("loop match => SEQ(COUNT())\n");
! 7649: #endif
! 7650: exp->exp_left->ref++;
! 7651: tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
! 7652: NULL, NULL, min, max);
! 7653: if (tmp2 == NULL)
! 7654: return(NULL);
! 7655: ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
! 7656: NULL, 0, 0);
! 7657: return(ret);
! 7658: }
! 7659: }
! 7660:
! 7661: #ifdef DEBUG_DERIV
! 7662: printf("Fallback to derivative\n");
! 7663: #endif
! 7664: if (IS_NILLABLE(sub)) {
! 7665: if (!(IS_NILLABLE(exp)))
! 7666: return(forbiddenExp);
! 7667: else
! 7668: ret = emptyExp;
! 7669: } else
! 7670: ret = NULL;
! 7671: /*
! 7672: * here the structured derivation made no progress so
! 7673: * we use the default token based derivation to force one more step
! 7674: */
! 7675: if (ctxt->tabSize == 0)
! 7676: ctxt->tabSize = 40;
! 7677:
! 7678: tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
! 7679: sizeof(const xmlChar *));
! 7680: if (tab == NULL) {
! 7681: return(NULL);
! 7682: }
! 7683:
! 7684: /*
! 7685: * collect all the strings accepted by the subexpression on input
! 7686: */
! 7687: len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
! 7688: while (len < 0) {
! 7689: const xmlChar **temp;
! 7690: temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
! 7691: sizeof(const xmlChar *));
! 7692: if (temp == NULL) {
! 7693: xmlFree((xmlChar **) tab);
! 7694: return(NULL);
! 7695: }
! 7696: tab = temp;
! 7697: ctxt->tabSize *= 2;
! 7698: len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
! 7699: }
! 7700: for (i = 0;i < len;i++) {
! 7701: tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
! 7702: if ((tmp == NULL) || (tmp == forbiddenExp)) {
! 7703: xmlExpFree(ctxt, ret);
! 7704: xmlFree((xmlChar **) tab);
! 7705: return(tmp);
! 7706: }
! 7707: tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
! 7708: if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
! 7709: xmlExpFree(ctxt, tmp);
! 7710: xmlExpFree(ctxt, ret);
! 7711: xmlFree((xmlChar **) tab);
! 7712: return(tmp);
! 7713: }
! 7714: tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
! 7715: xmlExpFree(ctxt, tmp);
! 7716: xmlExpFree(ctxt, tmp2);
! 7717:
! 7718: if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
! 7719: xmlExpFree(ctxt, ret);
! 7720: xmlFree((xmlChar **) tab);
! 7721: return(tmp3);
! 7722: }
! 7723:
! 7724: if (ret == NULL)
! 7725: ret = tmp3;
! 7726: else {
! 7727: ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
! 7728: if (ret == NULL) {
! 7729: xmlFree((xmlChar **) tab);
! 7730: return(NULL);
! 7731: }
! 7732: }
! 7733: }
! 7734: xmlFree((xmlChar **) tab);
! 7735: return(ret);
! 7736: }
! 7737:
! 7738: /**
! 7739: * xmlExpExpDerive:
! 7740: * @ctxt: the expressions context
! 7741: * @exp: the englobing expression
! 7742: * @sub: the subexpression
! 7743: *
! 7744: * Evaluates the expression resulting from @exp consuming a sub expression @sub
! 7745: * Based on algebraic derivation and sometimes direct Brzozowski derivation
! 7746: * it usually tatkes less than linear time and can handle expressions generating
! 7747: * infinite languages.
! 7748: *
! 7749: * Returns the resulting expression or NULL in case of internal error, the
! 7750: * result must be freed
! 7751: */
! 7752: xmlExpNodePtr
! 7753: xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
! 7754: if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
! 7755: return(NULL);
! 7756:
! 7757: /*
! 7758: * O(1) speedups
! 7759: */
! 7760: if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
! 7761: #ifdef DEBUG_DERIV
! 7762: printf("Sub nillable and not exp : can't subsume\n");
! 7763: #endif
! 7764: return(forbiddenExp);
! 7765: }
! 7766: if (xmlExpCheckCard(exp, sub) == 0) {
! 7767: #ifdef DEBUG_DERIV
! 7768: printf("sub generate longuer sequances than exp : can't subsume\n");
! 7769: #endif
! 7770: return(forbiddenExp);
! 7771: }
! 7772: return(xmlExpExpDeriveInt(ctxt, exp, sub));
! 7773: }
! 7774:
! 7775: /**
! 7776: * xmlExpSubsume:
! 7777: * @ctxt: the expressions context
! 7778: * @exp: the englobing expression
! 7779: * @sub: the subexpression
! 7780: *
! 7781: * Check whether @exp accepts all the languages accexpted by @sub
! 7782: * the input being a subexpression.
! 7783: *
! 7784: * Returns 1 if true 0 if false and -1 in case of failure.
! 7785: */
! 7786: int
! 7787: xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
! 7788: xmlExpNodePtr tmp;
! 7789:
! 7790: if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
! 7791: return(-1);
! 7792:
! 7793: /*
! 7794: * TODO: speedup by checking the language of sub is a subset of the
! 7795: * language of exp
! 7796: */
! 7797: /*
! 7798: * O(1) speedups
! 7799: */
! 7800: if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
! 7801: #ifdef DEBUG_DERIV
! 7802: printf("Sub nillable and not exp : can't subsume\n");
! 7803: #endif
! 7804: return(0);
! 7805: }
! 7806: if (xmlExpCheckCard(exp, sub) == 0) {
! 7807: #ifdef DEBUG_DERIV
! 7808: printf("sub generate longuer sequances than exp : can't subsume\n");
! 7809: #endif
! 7810: return(0);
! 7811: }
! 7812: tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
! 7813: #ifdef DEBUG_DERIV
! 7814: printf("Result derivation :\n");
! 7815: PRINT_EXP(tmp);
! 7816: #endif
! 7817: if (tmp == NULL)
! 7818: return(-1);
! 7819: if (tmp == forbiddenExp)
! 7820: return(0);
! 7821: if (tmp == emptyExp)
! 7822: return(1);
! 7823: if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
! 7824: xmlExpFree(ctxt, tmp);
! 7825: return(1);
! 7826: }
! 7827: xmlExpFree(ctxt, tmp);
! 7828: return(0);
! 7829: }
! 7830:
! 7831: /************************************************************************
! 7832: * *
! 7833: * Parsing expression *
! 7834: * *
! 7835: ************************************************************************/
! 7836:
! 7837: static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
! 7838:
! 7839: #undef CUR
! 7840: #define CUR (*ctxt->cur)
! 7841: #undef NEXT
! 7842: #define NEXT ctxt->cur++;
! 7843: #undef IS_BLANK
! 7844: #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
! 7845: #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
! 7846:
! 7847: static int
! 7848: xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
! 7849: int ret = 0;
! 7850:
! 7851: SKIP_BLANKS
! 7852: if (CUR == '*') {
! 7853: NEXT
! 7854: return(-1);
! 7855: }
! 7856: if ((CUR < '0') || (CUR > '9'))
! 7857: return(-1);
! 7858: while ((CUR >= '0') && (CUR <= '9')) {
! 7859: ret = ret * 10 + (CUR - '0');
! 7860: NEXT
! 7861: }
! 7862: return(ret);
! 7863: }
! 7864:
! 7865: static xmlExpNodePtr
! 7866: xmlExpParseOr(xmlExpCtxtPtr ctxt) {
! 7867: const char *base;
! 7868: xmlExpNodePtr ret;
! 7869: const xmlChar *val;
! 7870:
! 7871: SKIP_BLANKS
! 7872: base = ctxt->cur;
! 7873: if (*ctxt->cur == '(') {
! 7874: NEXT
! 7875: ret = xmlExpParseExpr(ctxt);
! 7876: SKIP_BLANKS
! 7877: if (*ctxt->cur != ')') {
! 7878: fprintf(stderr, "unbalanced '(' : %s\n", base);
! 7879: xmlExpFree(ctxt, ret);
! 7880: return(NULL);
! 7881: }
! 7882: NEXT;
! 7883: SKIP_BLANKS
! 7884: goto parse_quantifier;
! 7885: }
! 7886: while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
! 7887: (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
! 7888: (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
! 7889: NEXT;
! 7890: val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
! 7891: if (val == NULL)
! 7892: return(NULL);
! 7893: ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
! 7894: if (ret == NULL)
! 7895: return(NULL);
! 7896: SKIP_BLANKS
! 7897: parse_quantifier:
! 7898: if (CUR == '{') {
! 7899: int min, max;
! 7900:
! 7901: NEXT
! 7902: min = xmlExpParseNumber(ctxt);
! 7903: if (min < 0) {
! 7904: xmlExpFree(ctxt, ret);
! 7905: return(NULL);
! 7906: }
! 7907: SKIP_BLANKS
! 7908: if (CUR == ',') {
! 7909: NEXT
! 7910: max = xmlExpParseNumber(ctxt);
! 7911: SKIP_BLANKS
! 7912: } else
! 7913: max = min;
! 7914: if (CUR != '}') {
! 7915: xmlExpFree(ctxt, ret);
! 7916: return(NULL);
! 7917: }
! 7918: NEXT
! 7919: ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
! 7920: min, max);
! 7921: SKIP_BLANKS
! 7922: } else if (CUR == '?') {
! 7923: NEXT
! 7924: ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
! 7925: 0, 1);
! 7926: SKIP_BLANKS
! 7927: } else if (CUR == '+') {
! 7928: NEXT
! 7929: ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
! 7930: 1, -1);
! 7931: SKIP_BLANKS
! 7932: } else if (CUR == '*') {
! 7933: NEXT
! 7934: ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
! 7935: 0, -1);
! 7936: SKIP_BLANKS
! 7937: }
! 7938: return(ret);
! 7939: }
! 7940:
! 7941:
! 7942: static xmlExpNodePtr
! 7943: xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
! 7944: xmlExpNodePtr ret, right;
! 7945:
! 7946: ret = xmlExpParseOr(ctxt);
! 7947: SKIP_BLANKS
! 7948: while (CUR == '|') {
! 7949: NEXT
! 7950: right = xmlExpParseOr(ctxt);
! 7951: if (right == NULL) {
! 7952: xmlExpFree(ctxt, ret);
! 7953: return(NULL);
! 7954: }
! 7955: ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
! 7956: if (ret == NULL)
! 7957: return(NULL);
! 7958: }
! 7959: return(ret);
! 7960: }
! 7961:
! 7962: static xmlExpNodePtr
! 7963: xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
! 7964: xmlExpNodePtr ret, right;
! 7965:
! 7966: ret = xmlExpParseSeq(ctxt);
! 7967: SKIP_BLANKS
! 7968: while (CUR == ',') {
! 7969: NEXT
! 7970: right = xmlExpParseSeq(ctxt);
! 7971: if (right == NULL) {
! 7972: xmlExpFree(ctxt, ret);
! 7973: return(NULL);
! 7974: }
! 7975: ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
! 7976: if (ret == NULL)
! 7977: return(NULL);
! 7978: }
! 7979: return(ret);
! 7980: }
! 7981:
! 7982: /**
! 7983: * xmlExpParse:
! 7984: * @ctxt: the expressions context
! 7985: * @expr: the 0 terminated string
! 7986: *
! 7987: * Minimal parser for regexps, it understand the following constructs
! 7988: * - string terminals
! 7989: * - choice operator |
! 7990: * - sequence operator ,
! 7991: * - subexpressions (...)
! 7992: * - usual cardinality operators + * and ?
! 7993: * - finite sequences { min, max }
! 7994: * - infinite sequences { min, * }
! 7995: * There is minimal checkings made especially no checking on strings values
! 7996: *
! 7997: * Returns a new expression or NULL in case of failure
! 7998: */
! 7999: xmlExpNodePtr
! 8000: xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
! 8001: xmlExpNodePtr ret;
! 8002:
! 8003: ctxt->expr = expr;
! 8004: ctxt->cur = expr;
! 8005:
! 8006: ret = xmlExpParseExpr(ctxt);
! 8007: SKIP_BLANKS
! 8008: if (*ctxt->cur != 0) {
! 8009: xmlExpFree(ctxt, ret);
! 8010: return(NULL);
! 8011: }
! 8012: return(ret);
! 8013: }
! 8014:
! 8015: static void
! 8016: xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
! 8017: xmlExpNodePtr c;
! 8018:
! 8019: if (expr == NULL) return;
! 8020: if (glob) xmlBufferWriteChar(buf, "(");
! 8021: switch (expr->type) {
! 8022: case XML_EXP_EMPTY:
! 8023: xmlBufferWriteChar(buf, "empty");
! 8024: break;
! 8025: case XML_EXP_FORBID:
! 8026: xmlBufferWriteChar(buf, "forbidden");
! 8027: break;
! 8028: case XML_EXP_ATOM:
! 8029: xmlBufferWriteCHAR(buf, expr->exp_str);
! 8030: break;
! 8031: case XML_EXP_SEQ:
! 8032: c = expr->exp_left;
! 8033: if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
! 8034: xmlExpDumpInt(buf, c, 1);
! 8035: else
! 8036: xmlExpDumpInt(buf, c, 0);
! 8037: xmlBufferWriteChar(buf, " , ");
! 8038: c = expr->exp_right;
! 8039: if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
! 8040: xmlExpDumpInt(buf, c, 1);
! 8041: else
! 8042: xmlExpDumpInt(buf, c, 0);
! 8043: break;
! 8044: case XML_EXP_OR:
! 8045: c = expr->exp_left;
! 8046: if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
! 8047: xmlExpDumpInt(buf, c, 1);
! 8048: else
! 8049: xmlExpDumpInt(buf, c, 0);
! 8050: xmlBufferWriteChar(buf, " | ");
! 8051: c = expr->exp_right;
! 8052: if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
! 8053: xmlExpDumpInt(buf, c, 1);
! 8054: else
! 8055: xmlExpDumpInt(buf, c, 0);
! 8056: break;
! 8057: case XML_EXP_COUNT: {
! 8058: char rep[40];
! 8059:
! 8060: c = expr->exp_left;
! 8061: if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
! 8062: xmlExpDumpInt(buf, c, 1);
! 8063: else
! 8064: xmlExpDumpInt(buf, c, 0);
! 8065: if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
! 8066: rep[0] = '?';
! 8067: rep[1] = 0;
! 8068: } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
! 8069: rep[0] = '*';
! 8070: rep[1] = 0;
! 8071: } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
! 8072: rep[0] = '+';
! 8073: rep[1] = 0;
! 8074: } else if (expr->exp_max == expr->exp_min) {
! 8075: snprintf(rep, 39, "{%d}", expr->exp_min);
! 8076: } else if (expr->exp_max < 0) {
! 8077: snprintf(rep, 39, "{%d,inf}", expr->exp_min);
! 8078: } else {
! 8079: snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
! 8080: }
! 8081: rep[39] = 0;
! 8082: xmlBufferWriteChar(buf, rep);
! 8083: break;
! 8084: }
! 8085: default:
! 8086: fprintf(stderr, "Error in tree\n");
! 8087: }
! 8088: if (glob)
! 8089: xmlBufferWriteChar(buf, ")");
! 8090: }
! 8091: /**
! 8092: * xmlExpDump:
! 8093: * @buf: a buffer to receive the output
! 8094: * @expr: the compiled expression
! 8095: *
! 8096: * Serialize the expression as compiled to the buffer
! 8097: */
! 8098: void
! 8099: xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
! 8100: if ((buf == NULL) || (expr == NULL))
! 8101: return;
! 8102: xmlExpDumpInt(buf, expr, 0);
! 8103: }
! 8104:
! 8105: /**
! 8106: * xmlExpMaxToken:
! 8107: * @expr: a compiled expression
! 8108: *
! 8109: * Indicate the maximum number of input a expression can accept
! 8110: *
! 8111: * Returns the maximum length or -1 in case of error
! 8112: */
! 8113: int
! 8114: xmlExpMaxToken(xmlExpNodePtr expr) {
! 8115: if (expr == NULL)
! 8116: return(-1);
! 8117: return(expr->c_max);
! 8118: }
! 8119:
! 8120: /**
! 8121: * xmlExpCtxtNbNodes:
! 8122: * @ctxt: an expression context
! 8123: *
! 8124: * Debugging facility provides the number of allocated nodes at a that point
! 8125: *
! 8126: * Returns the number of nodes in use or -1 in case of error
! 8127: */
! 8128: int
! 8129: xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
! 8130: if (ctxt == NULL)
! 8131: return(-1);
! 8132: return(ctxt->nb_nodes);
! 8133: }
! 8134:
! 8135: /**
! 8136: * xmlExpCtxtNbCons:
! 8137: * @ctxt: an expression context
! 8138: *
! 8139: * Debugging facility provides the number of allocated nodes over lifetime
! 8140: *
! 8141: * Returns the number of nodes ever allocated or -1 in case of error
! 8142: */
! 8143: int
! 8144: xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
! 8145: if (ctxt == NULL)
! 8146: return(-1);
! 8147: return(ctxt->nb_cons);
! 8148: }
! 8149:
! 8150: #endif /* LIBXML_EXPR_ENABLED */
! 8151: #define bottom_xmlregexp
! 8152: #include "elfgcchack.h"
! 8153: #endif /* LIBXML_REGEXP_ENABLED */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>