File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / xmlregexp.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:28 2014 UTC (10 years ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD
libxml2 2.9.1

    1: /*
    2:  * regexp.c: generic and extensible Regular Expression engine
    3:  *
    4:  * Basically designed with the purpose of compiling regexps for
    5:  * the variety of validation/shemas mechanisms now available in
    6:  * XML related specifications these include:
    7:  *    - XML-1.0 DTD validation
    8:  *    - XML Schemas structure part 1
    9:  *    - XML Schemas Datatypes part 2 especially Appendix F
   10:  *    - RELAX-NG/TREX i.e. the counter proposal
   11:  *
   12:  * See Copyright for the status of this software.
   13:  *
   14:  * Daniel Veillard <veillard@redhat.com>
   15:  */
   16: 
   17: #define IN_LIBXML
   18: #include "libxml.h"
   19: 
   20: #ifdef LIBXML_REGEXP_ENABLED
   21: 
   22: /* #define DEBUG_ERR */
   23: 
   24: #include <stdio.h>
   25: #include <string.h>
   26: #ifdef HAVE_LIMITS_H
   27: #include <limits.h>
   28: #endif
   29: 
   30: #include <libxml/tree.h>
   31: #include <libxml/parserInternals.h>
   32: #include <libxml/xmlregexp.h>
   33: #include <libxml/xmlautomata.h>
   34: #include <libxml/xmlunicode.h>
   35: 
   36: #ifndef INT_MAX
   37: #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
   38: #endif
   39: 
   40: /* #define DEBUG_REGEXP_GRAPH */
   41: /* #define DEBUG_REGEXP_EXEC */
   42: /* #define DEBUG_PUSH */
   43: /* #define DEBUG_COMPACTION */
   44: 
   45: #define MAX_PUSH 10000000
   46: 
   47: #ifdef ERROR
   48: #undef ERROR
   49: #endif
   50: #define ERROR(str)							\
   51:     ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
   52:     xmlRegexpErrCompile(ctxt, str);
   53: #define NEXT ctxt->cur++
   54: #define CUR (*(ctxt->cur))
   55: #define NXT(index) (ctxt->cur[index])
   56: 
   57: #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
   58: #define NEXTL(l) ctxt->cur += l;
   59: #define XML_REG_STRING_SEPARATOR '|'
   60: /*
   61:  * Need PREV to check on a '-' within a Character Group. May only be used
   62:  * when it's guaranteed that cur is not at the beginning of ctxt->string!
   63:  */
   64: #define PREV (ctxt->cur[-1])
   65: 
   66: /**
   67:  * TODO:
   68:  *
   69:  * macro to flag unimplemented blocks
   70:  */
   71: #define TODO								\
   72:     xmlGenericError(xmlGenericErrorContext,				\
   73: 	    "Unimplemented block at %s:%d\n",				\
   74:             __FILE__, __LINE__);
   75: 
   76: /************************************************************************
   77:  *									*
   78:  *			Datatypes and structures			*
   79:  *									*
   80:  ************************************************************************/
   81: 
   82: /*
   83:  * Note: the order of the enums below is significant, do not shuffle
   84:  */
   85: typedef enum {
   86:     XML_REGEXP_EPSILON = 1,
   87:     XML_REGEXP_CHARVAL,
   88:     XML_REGEXP_RANGES,
   89:     XML_REGEXP_SUBREG,  /* used for () sub regexps */
   90:     XML_REGEXP_STRING,
   91:     XML_REGEXP_ANYCHAR, /* . */
   92:     XML_REGEXP_ANYSPACE, /* \s */
   93:     XML_REGEXP_NOTSPACE, /* \S */
   94:     XML_REGEXP_INITNAME, /* \l */
   95:     XML_REGEXP_NOTINITNAME, /* \L */
   96:     XML_REGEXP_NAMECHAR, /* \c */
   97:     XML_REGEXP_NOTNAMECHAR, /* \C */
   98:     XML_REGEXP_DECIMAL, /* \d */
   99:     XML_REGEXP_NOTDECIMAL, /* \D */
  100:     XML_REGEXP_REALCHAR, /* \w */
  101:     XML_REGEXP_NOTREALCHAR, /* \W */
  102:     XML_REGEXP_LETTER = 100,
  103:     XML_REGEXP_LETTER_UPPERCASE,
  104:     XML_REGEXP_LETTER_LOWERCASE,
  105:     XML_REGEXP_LETTER_TITLECASE,
  106:     XML_REGEXP_LETTER_MODIFIER,
  107:     XML_REGEXP_LETTER_OTHERS,
  108:     XML_REGEXP_MARK,
  109:     XML_REGEXP_MARK_NONSPACING,
  110:     XML_REGEXP_MARK_SPACECOMBINING,
  111:     XML_REGEXP_MARK_ENCLOSING,
  112:     XML_REGEXP_NUMBER,
  113:     XML_REGEXP_NUMBER_DECIMAL,
  114:     XML_REGEXP_NUMBER_LETTER,
  115:     XML_REGEXP_NUMBER_OTHERS,
  116:     XML_REGEXP_PUNCT,
  117:     XML_REGEXP_PUNCT_CONNECTOR,
  118:     XML_REGEXP_PUNCT_DASH,
  119:     XML_REGEXP_PUNCT_OPEN,
  120:     XML_REGEXP_PUNCT_CLOSE,
  121:     XML_REGEXP_PUNCT_INITQUOTE,
  122:     XML_REGEXP_PUNCT_FINQUOTE,
  123:     XML_REGEXP_PUNCT_OTHERS,
  124:     XML_REGEXP_SEPAR,
  125:     XML_REGEXP_SEPAR_SPACE,
  126:     XML_REGEXP_SEPAR_LINE,
  127:     XML_REGEXP_SEPAR_PARA,
  128:     XML_REGEXP_SYMBOL,
  129:     XML_REGEXP_SYMBOL_MATH,
  130:     XML_REGEXP_SYMBOL_CURRENCY,
  131:     XML_REGEXP_SYMBOL_MODIFIER,
  132:     XML_REGEXP_SYMBOL_OTHERS,
  133:     XML_REGEXP_OTHER,
  134:     XML_REGEXP_OTHER_CONTROL,
  135:     XML_REGEXP_OTHER_FORMAT,
  136:     XML_REGEXP_OTHER_PRIVATE,
  137:     XML_REGEXP_OTHER_NA,
  138:     XML_REGEXP_BLOCK_NAME
  139: } xmlRegAtomType;
  140: 
  141: typedef enum {
  142:     XML_REGEXP_QUANT_EPSILON = 1,
  143:     XML_REGEXP_QUANT_ONCE,
  144:     XML_REGEXP_QUANT_OPT,
  145:     XML_REGEXP_QUANT_MULT,
  146:     XML_REGEXP_QUANT_PLUS,
  147:     XML_REGEXP_QUANT_ONCEONLY,
  148:     XML_REGEXP_QUANT_ALL,
  149:     XML_REGEXP_QUANT_RANGE
  150: } xmlRegQuantType;
  151: 
  152: typedef enum {
  153:     XML_REGEXP_START_STATE = 1,
  154:     XML_REGEXP_FINAL_STATE,
  155:     XML_REGEXP_TRANS_STATE,
  156:     XML_REGEXP_SINK_STATE,
  157:     XML_REGEXP_UNREACH_STATE
  158: } xmlRegStateType;
  159: 
  160: typedef enum {
  161:     XML_REGEXP_MARK_NORMAL = 0,
  162:     XML_REGEXP_MARK_START,
  163:     XML_REGEXP_MARK_VISITED
  164: } xmlRegMarkedType;
  165: 
  166: typedef struct _xmlRegRange xmlRegRange;
  167: typedef xmlRegRange *xmlRegRangePtr;
  168: 
  169: struct _xmlRegRange {
  170:     int neg;		/* 0 normal, 1 not, 2 exclude */
  171:     xmlRegAtomType type;
  172:     int start;
  173:     int end;
  174:     xmlChar *blockName;
  175: };
  176: 
  177: typedef struct _xmlRegAtom xmlRegAtom;
  178: typedef xmlRegAtom *xmlRegAtomPtr;
  179: 
  180: typedef struct _xmlAutomataState xmlRegState;
  181: typedef xmlRegState *xmlRegStatePtr;
  182: 
  183: struct _xmlRegAtom {
  184:     int no;
  185:     xmlRegAtomType type;
  186:     xmlRegQuantType quant;
  187:     int min;
  188:     int max;
  189: 
  190:     void *valuep;
  191:     void *valuep2;
  192:     int neg;
  193:     int codepoint;
  194:     xmlRegStatePtr start;
  195:     xmlRegStatePtr start0;
  196:     xmlRegStatePtr stop;
  197:     int maxRanges;
  198:     int nbRanges;
  199:     xmlRegRangePtr *ranges;
  200:     void *data;
  201: };
  202: 
  203: typedef struct _xmlRegCounter xmlRegCounter;
  204: typedef xmlRegCounter *xmlRegCounterPtr;
  205: 
  206: struct _xmlRegCounter {
  207:     int min;
  208:     int max;
  209: };
  210: 
  211: typedef struct _xmlRegTrans xmlRegTrans;
  212: typedef xmlRegTrans *xmlRegTransPtr;
  213: 
  214: struct _xmlRegTrans {
  215:     xmlRegAtomPtr atom;
  216:     int to;
  217:     int counter;
  218:     int count;
  219:     int nd;
  220: };
  221: 
  222: struct _xmlAutomataState {
  223:     xmlRegStateType type;
  224:     xmlRegMarkedType mark;
  225:     xmlRegMarkedType markd;
  226:     xmlRegMarkedType reached;
  227:     int no;
  228:     int maxTrans;
  229:     int nbTrans;
  230:     xmlRegTrans *trans;
  231:     /*  knowing states ponting to us can speed things up */
  232:     int maxTransTo;
  233:     int nbTransTo;
  234:     int *transTo;
  235: };
  236: 
  237: typedef struct _xmlAutomata xmlRegParserCtxt;
  238: typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
  239: 
  240: #define AM_AUTOMATA_RNG 1
  241: 
  242: struct _xmlAutomata {
  243:     xmlChar *string;
  244:     xmlChar *cur;
  245: 
  246:     int error;
  247:     int neg;
  248: 
  249:     xmlRegStatePtr start;
  250:     xmlRegStatePtr end;
  251:     xmlRegStatePtr state;
  252: 
  253:     xmlRegAtomPtr atom;
  254: 
  255:     int maxAtoms;
  256:     int nbAtoms;
  257:     xmlRegAtomPtr *atoms;
  258: 
  259:     int maxStates;
  260:     int nbStates;
  261:     xmlRegStatePtr *states;
  262: 
  263:     int maxCounters;
  264:     int nbCounters;
  265:     xmlRegCounter *counters;
  266: 
  267:     int determinist;
  268:     int negs;
  269:     int flags;
  270: };
  271: 
  272: struct _xmlRegexp {
  273:     xmlChar *string;
  274:     int nbStates;
  275:     xmlRegStatePtr *states;
  276:     int nbAtoms;
  277:     xmlRegAtomPtr *atoms;
  278:     int nbCounters;
  279:     xmlRegCounter *counters;
  280:     int determinist;
  281:     int flags;
  282:     /*
  283:      * That's the compact form for determinists automatas
  284:      */
  285:     int nbstates;
  286:     int *compact;
  287:     void **transdata;
  288:     int nbstrings;
  289:     xmlChar **stringMap;
  290: };
  291: 
  292: typedef struct _xmlRegExecRollback xmlRegExecRollback;
  293: typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
  294: 
  295: struct _xmlRegExecRollback {
  296:     xmlRegStatePtr state;/* the current state */
  297:     int index;		/* the index in the input stack */
  298:     int nextbranch;	/* the next transition to explore in that state */
  299:     int *counts;	/* save the automata state if it has some */
  300: };
  301: 
  302: typedef struct _xmlRegInputToken xmlRegInputToken;
  303: typedef xmlRegInputToken *xmlRegInputTokenPtr;
  304: 
  305: struct _xmlRegInputToken {
  306:     xmlChar *value;
  307:     void *data;
  308: };
  309: 
  310: struct _xmlRegExecCtxt {
  311:     int status;		/* execution status != 0 indicate an error */
  312:     int determinist;	/* did we find an indeterministic behaviour */
  313:     xmlRegexpPtr comp;	/* the compiled regexp */
  314:     xmlRegExecCallbacks callback;
  315:     void *data;
  316: 
  317:     xmlRegStatePtr state;/* the current state */
  318:     int transno;	/* the current transition on that state */
  319:     int transcount;	/* the number of chars in char counted transitions */
  320: 
  321:     /*
  322:      * A stack of rollback states
  323:      */
  324:     int maxRollbacks;
  325:     int nbRollbacks;
  326:     xmlRegExecRollback *rollbacks;
  327: 
  328:     /*
  329:      * The state of the automata if any
  330:      */
  331:     int *counts;
  332: 
  333:     /*
  334:      * The input stack
  335:      */
  336:     int inputStackMax;
  337:     int inputStackNr;
  338:     int index;
  339:     int *charStack;
  340:     const xmlChar *inputString; /* when operating on characters */
  341:     xmlRegInputTokenPtr inputStack;/* when operating on strings */
  342: 
  343:     /*
  344:      * error handling
  345:      */
  346:     int errStateNo;		/* the error state number */
  347:     xmlRegStatePtr errState;    /* the error state */
  348:     xmlChar *errString;		/* the string raising the error */
  349:     int *errCounts;		/* counters at the error state */
  350:     int nbPush;
  351: };
  352: 
  353: #define REGEXP_ALL_COUNTER	0x123456
  354: #define REGEXP_ALL_LAX_COUNTER	0x123457
  355: 
  356: static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
  357: static void xmlRegFreeState(xmlRegStatePtr state);
  358: static void xmlRegFreeAtom(xmlRegAtomPtr atom);
  359: static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
  360: static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
  361: static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
  362:                   int neg, int start, int end, const xmlChar *blockName);
  363: 
  364: void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
  365: 
  366: /************************************************************************
  367:  *									*
  368:  *		Regexp memory error handler				*
  369:  *									*
  370:  ************************************************************************/
  371: /**
  372:  * xmlRegexpErrMemory:
  373:  * @extra:  extra information
  374:  *
  375:  * Handle an out of memory condition
  376:  */
  377: static void
  378: xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
  379: {
  380:     const char *regexp = NULL;
  381:     if (ctxt != NULL) {
  382:         regexp = (const char *) ctxt->string;
  383: 	ctxt->error = XML_ERR_NO_MEMORY;
  384:     }
  385:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  386: 		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
  387: 		    regexp, NULL, 0, 0,
  388: 		    "Memory allocation failed : %s\n", extra);
  389: }
  390: 
  391: /**
  392:  * xmlRegexpErrCompile:
  393:  * @extra:  extra information
  394:  *
  395:  * Handle a compilation failure
  396:  */
  397: static void
  398: xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
  399: {
  400:     const char *regexp = NULL;
  401:     int idx = 0;
  402: 
  403:     if (ctxt != NULL) {
  404:         regexp = (const char *) ctxt->string;
  405: 	idx = ctxt->cur - ctxt->string;
  406: 	ctxt->error = XML_REGEXP_COMPILE_ERROR;
  407:     }
  408:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  409: 		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
  410: 		    regexp, NULL, idx, 0,
  411: 		    "failed to compile: %s\n", extra);
  412: }
  413: 
  414: /************************************************************************
  415:  *									*
  416:  *			Allocation/Deallocation				*
  417:  *									*
  418:  ************************************************************************/
  419: 
  420: static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
  421: /**
  422:  * xmlRegEpxFromParse:
  423:  * @ctxt:  the parser context used to build it
  424:  *
  425:  * Allocate a new regexp and fill it with the result from the parser
  426:  *
  427:  * Returns the new regexp or NULL in case of error
  428:  */
  429: static xmlRegexpPtr
  430: xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
  431:     xmlRegexpPtr ret;
  432: 
  433:     ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
  434:     if (ret == NULL) {
  435: 	xmlRegexpErrMemory(ctxt, "compiling regexp");
  436: 	return(NULL);
  437:     }
  438:     memset(ret, 0, sizeof(xmlRegexp));
  439:     ret->string = ctxt->string;
  440:     ret->nbStates = ctxt->nbStates;
  441:     ret->states = ctxt->states;
  442:     ret->nbAtoms = ctxt->nbAtoms;
  443:     ret->atoms = ctxt->atoms;
  444:     ret->nbCounters = ctxt->nbCounters;
  445:     ret->counters = ctxt->counters;
  446:     ret->determinist = ctxt->determinist;
  447:     ret->flags = ctxt->flags;
  448:     if (ret->determinist == -1) {
  449:         xmlRegexpIsDeterminist(ret);
  450:     }
  451: 
  452:     if ((ret->determinist != 0) &&
  453: 	(ret->nbCounters == 0) &&
  454: 	(ctxt->negs == 0) &&
  455: 	(ret->atoms != NULL) &&
  456: 	(ret->atoms[0] != NULL) &&
  457: 	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
  458: 	int i, j, nbstates = 0, nbatoms = 0;
  459: 	int *stateRemap;
  460: 	int *stringRemap;
  461: 	int *transitions;
  462: 	void **transdata;
  463: 	xmlChar **stringMap;
  464:         xmlChar *value;
  465: 
  466: 	/*
  467: 	 * Switch to a compact representation
  468: 	 * 1/ counting the effective number of states left
  469: 	 * 2/ counting the unique number of atoms, and check that
  470: 	 *    they are all of the string type
  471: 	 * 3/ build a table state x atom for the transitions
  472: 	 */
  473: 
  474: 	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
  475: 	if (stateRemap == NULL) {
  476: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  477: 	    xmlFree(ret);
  478: 	    return(NULL);
  479: 	}
  480: 	for (i = 0;i < ret->nbStates;i++) {
  481: 	    if (ret->states[i] != NULL) {
  482: 		stateRemap[i] = nbstates;
  483: 		nbstates++;
  484: 	    } else {
  485: 		stateRemap[i] = -1;
  486: 	    }
  487: 	}
  488: #ifdef DEBUG_COMPACTION
  489: 	printf("Final: %d states\n", nbstates);
  490: #endif
  491: 	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
  492: 	if (stringMap == NULL) {
  493: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  494: 	    xmlFree(stateRemap);
  495: 	    xmlFree(ret);
  496: 	    return(NULL);
  497: 	}
  498: 	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
  499: 	if (stringRemap == NULL) {
  500: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  501: 	    xmlFree(stringMap);
  502: 	    xmlFree(stateRemap);
  503: 	    xmlFree(ret);
  504: 	    return(NULL);
  505: 	}
  506: 	for (i = 0;i < ret->nbAtoms;i++) {
  507: 	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
  508: 		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
  509: 		value = ret->atoms[i]->valuep;
  510:                 for (j = 0;j < nbatoms;j++) {
  511: 		    if (xmlStrEqual(stringMap[j], value)) {
  512: 			stringRemap[i] = j;
  513: 			break;
  514: 		    }
  515: 		}
  516: 		if (j >= nbatoms) {
  517: 		    stringRemap[i] = nbatoms;
  518: 		    stringMap[nbatoms] = xmlStrdup(value);
  519: 		    if (stringMap[nbatoms] == NULL) {
  520: 			for (i = 0;i < nbatoms;i++)
  521: 			    xmlFree(stringMap[i]);
  522: 			xmlFree(stringRemap);
  523: 			xmlFree(stringMap);
  524: 			xmlFree(stateRemap);
  525: 			xmlFree(ret);
  526: 			return(NULL);
  527: 		    }
  528: 		    nbatoms++;
  529: 		}
  530: 	    } else {
  531: 		xmlFree(stateRemap);
  532: 		xmlFree(stringRemap);
  533: 		for (i = 0;i < nbatoms;i++)
  534: 		    xmlFree(stringMap[i]);
  535: 		xmlFree(stringMap);
  536: 		xmlFree(ret);
  537: 		return(NULL);
  538: 	    }
  539: 	}
  540: #ifdef DEBUG_COMPACTION
  541: 	printf("Final: %d atoms\n", nbatoms);
  542: #endif
  543: 	transitions = (int *) xmlMalloc((nbstates + 1) *
  544: 	                                (nbatoms + 1) * sizeof(int));
  545: 	if (transitions == NULL) {
  546: 	    xmlFree(stateRemap);
  547: 	    xmlFree(stringRemap);
  548: 	    xmlFree(stringMap);
  549: 	    xmlFree(ret);
  550: 	    return(NULL);
  551: 	}
  552: 	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
  553: 
  554: 	/*
  555: 	 * Allocate the transition table. The first entry for each
  556: 	 * state corresponds to the state type.
  557: 	 */
  558: 	transdata = NULL;
  559: 
  560: 	for (i = 0;i < ret->nbStates;i++) {
  561: 	    int stateno, atomno, targetno, prev;
  562: 	    xmlRegStatePtr state;
  563: 	    xmlRegTransPtr trans;
  564: 
  565: 	    stateno = stateRemap[i];
  566: 	    if (stateno == -1)
  567: 		continue;
  568: 	    state = ret->states[i];
  569: 
  570: 	    transitions[stateno * (nbatoms + 1)] = state->type;
  571: 
  572: 	    for (j = 0;j < state->nbTrans;j++) {
  573: 		trans = &(state->trans[j]);
  574: 		if ((trans->to == -1) || (trans->atom == NULL))
  575: 		    continue;
  576:                 atomno = stringRemap[trans->atom->no];
  577: 		if ((trans->atom->data != NULL) && (transdata == NULL)) {
  578: 		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
  579: 			                            sizeof(void *));
  580: 		    if (transdata != NULL)
  581: 			memset(transdata, 0,
  582: 			       nbstates * nbatoms * sizeof(void *));
  583: 		    else {
  584: 			xmlRegexpErrMemory(ctxt, "compiling regexp");
  585: 			break;
  586: 		    }
  587: 		}
  588: 		targetno = stateRemap[trans->to];
  589: 		/*
  590: 		 * if the same atom can generate transitions to 2 different
  591: 		 * states then it means the automata is not determinist and
  592: 		 * the compact form can't be used !
  593: 		 */
  594: 		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
  595: 		if (prev != 0) {
  596: 		    if (prev != targetno + 1) {
  597: 			ret->determinist = 0;
  598: #ifdef DEBUG_COMPACTION
  599: 			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
  600: 			       i, j, trans->atom->no, trans->to, atomno, targetno);
  601: 			printf("       previous to is %d\n", prev);
  602: #endif
  603: 			if (transdata != NULL)
  604: 			    xmlFree(transdata);
  605: 			xmlFree(transitions);
  606: 			xmlFree(stateRemap);
  607: 			xmlFree(stringRemap);
  608: 			for (i = 0;i < nbatoms;i++)
  609: 			    xmlFree(stringMap[i]);
  610: 			xmlFree(stringMap);
  611: 			goto not_determ;
  612: 		    }
  613: 		} else {
  614: #if 0
  615: 		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
  616: 			   i, j, trans->atom->no, trans->to, atomno, targetno);
  617: #endif
  618: 		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
  619: 			targetno + 1; /* to avoid 0 */
  620: 		    if (transdata != NULL)
  621: 			transdata[stateno * nbatoms + atomno] =
  622: 			    trans->atom->data;
  623: 		}
  624: 	    }
  625: 	}
  626: 	ret->determinist = 1;
  627: #ifdef DEBUG_COMPACTION
  628: 	/*
  629: 	 * Debug
  630: 	 */
  631: 	for (i = 0;i < nbstates;i++) {
  632: 	    for (j = 0;j < nbatoms + 1;j++) {
  633:                 printf("%02d ", transitions[i * (nbatoms + 1) + j]);
  634: 	    }
  635: 	    printf("\n");
  636: 	}
  637: 	printf("\n");
  638: #endif
  639: 	/*
  640: 	 * Cleanup of the old data
  641: 	 */
  642: 	if (ret->states != NULL) {
  643: 	    for (i = 0;i < ret->nbStates;i++)
  644: 		xmlRegFreeState(ret->states[i]);
  645: 	    xmlFree(ret->states);
  646: 	}
  647: 	ret->states = NULL;
  648: 	ret->nbStates = 0;
  649: 	if (ret->atoms != NULL) {
  650: 	    for (i = 0;i < ret->nbAtoms;i++)
  651: 		xmlRegFreeAtom(ret->atoms[i]);
  652: 	    xmlFree(ret->atoms);
  653: 	}
  654: 	ret->atoms = NULL;
  655: 	ret->nbAtoms = 0;
  656: 
  657: 	ret->compact = transitions;
  658: 	ret->transdata = transdata;
  659: 	ret->stringMap = stringMap;
  660: 	ret->nbstrings = nbatoms;
  661: 	ret->nbstates = nbstates;
  662: 	xmlFree(stateRemap);
  663: 	xmlFree(stringRemap);
  664:     }
  665: not_determ:
  666:     ctxt->string = NULL;
  667:     ctxt->nbStates = 0;
  668:     ctxt->states = NULL;
  669:     ctxt->nbAtoms = 0;
  670:     ctxt->atoms = NULL;
  671:     ctxt->nbCounters = 0;
  672:     ctxt->counters = NULL;
  673:     return(ret);
  674: }
  675: 
  676: /**
  677:  * xmlRegNewParserCtxt:
  678:  * @string:  the string to parse
  679:  *
  680:  * Allocate a new regexp parser context
  681:  *
  682:  * Returns the new context or NULL in case of error
  683:  */
  684: static xmlRegParserCtxtPtr
  685: xmlRegNewParserCtxt(const xmlChar *string) {
  686:     xmlRegParserCtxtPtr ret;
  687: 
  688:     ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
  689:     if (ret == NULL)
  690: 	return(NULL);
  691:     memset(ret, 0, sizeof(xmlRegParserCtxt));
  692:     if (string != NULL)
  693: 	ret->string = xmlStrdup(string);
  694:     ret->cur = ret->string;
  695:     ret->neg = 0;
  696:     ret->negs = 0;
  697:     ret->error = 0;
  698:     ret->determinist = -1;
  699:     return(ret);
  700: }
  701: 
  702: /**
  703:  * xmlRegNewRange:
  704:  * @ctxt:  the regexp parser context
  705:  * @neg:  is that negative
  706:  * @type:  the type of range
  707:  * @start:  the start codepoint
  708:  * @end:  the end codepoint
  709:  *
  710:  * Allocate a new regexp range
  711:  *
  712:  * Returns the new range or NULL in case of error
  713:  */
  714: static xmlRegRangePtr
  715: xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
  716: 	       int neg, xmlRegAtomType type, int start, int end) {
  717:     xmlRegRangePtr ret;
  718: 
  719:     ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
  720:     if (ret == NULL) {
  721: 	xmlRegexpErrMemory(ctxt, "allocating range");
  722: 	return(NULL);
  723:     }
  724:     ret->neg = neg;
  725:     ret->type = type;
  726:     ret->start = start;
  727:     ret->end = end;
  728:     return(ret);
  729: }
  730: 
  731: /**
  732:  * xmlRegFreeRange:
  733:  * @range:  the regexp range
  734:  *
  735:  * Free a regexp range
  736:  */
  737: static void
  738: xmlRegFreeRange(xmlRegRangePtr range) {
  739:     if (range == NULL)
  740: 	return;
  741: 
  742:     if (range->blockName != NULL)
  743: 	xmlFree(range->blockName);
  744:     xmlFree(range);
  745: }
  746: 
  747: /**
  748:  * xmlRegCopyRange:
  749:  * @range:  the regexp range
  750:  *
  751:  * Copy a regexp range
  752:  *
  753:  * Returns the new copy or NULL in case of error.
  754:  */
  755: static xmlRegRangePtr
  756: xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
  757:     xmlRegRangePtr ret;
  758: 
  759:     if (range == NULL)
  760: 	return(NULL);
  761: 
  762:     ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
  763:                          range->end);
  764:     if (ret == NULL)
  765:         return(NULL);
  766:     if (range->blockName != NULL) {
  767: 	ret->blockName = xmlStrdup(range->blockName);
  768: 	if (ret->blockName == NULL) {
  769: 	    xmlRegexpErrMemory(ctxt, "allocating range");
  770: 	    xmlRegFreeRange(ret);
  771: 	    return(NULL);
  772: 	}
  773:     }
  774:     return(ret);
  775: }
  776: 
  777: /**
  778:  * xmlRegNewAtom:
  779:  * @ctxt:  the regexp parser context
  780:  * @type:  the type of atom
  781:  *
  782:  * Allocate a new atom
  783:  *
  784:  * Returns the new atom or NULL in case of error
  785:  */
  786: static xmlRegAtomPtr
  787: xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
  788:     xmlRegAtomPtr ret;
  789: 
  790:     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  791:     if (ret == NULL) {
  792: 	xmlRegexpErrMemory(ctxt, "allocating atom");
  793: 	return(NULL);
  794:     }
  795:     memset(ret, 0, sizeof(xmlRegAtom));
  796:     ret->type = type;
  797:     ret->quant = XML_REGEXP_QUANT_ONCE;
  798:     ret->min = 0;
  799:     ret->max = 0;
  800:     return(ret);
  801: }
  802: 
  803: /**
  804:  * xmlRegFreeAtom:
  805:  * @atom:  the regexp atom
  806:  *
  807:  * Free a regexp atom
  808:  */
  809: static void
  810: xmlRegFreeAtom(xmlRegAtomPtr atom) {
  811:     int i;
  812: 
  813:     if (atom == NULL)
  814: 	return;
  815: 
  816:     for (i = 0;i < atom->nbRanges;i++)
  817: 	xmlRegFreeRange(atom->ranges[i]);
  818:     if (atom->ranges != NULL)
  819: 	xmlFree(atom->ranges);
  820:     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
  821: 	xmlFree(atom->valuep);
  822:     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
  823: 	xmlFree(atom->valuep2);
  824:     if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
  825: 	xmlFree(atom->valuep);
  826:     xmlFree(atom);
  827: }
  828: 
  829: /**
  830:  * xmlRegCopyAtom:
  831:  * @ctxt:  the regexp parser context
  832:  * @atom:  the oiginal atom
  833:  *
  834:  * Allocate a new regexp range
  835:  *
  836:  * Returns the new atom or NULL in case of error
  837:  */
  838: static xmlRegAtomPtr
  839: xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
  840:     xmlRegAtomPtr ret;
  841: 
  842:     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  843:     if (ret == NULL) {
  844: 	xmlRegexpErrMemory(ctxt, "copying atom");
  845: 	return(NULL);
  846:     }
  847:     memset(ret, 0, sizeof(xmlRegAtom));
  848:     ret->type = atom->type;
  849:     ret->quant = atom->quant;
  850:     ret->min = atom->min;
  851:     ret->max = atom->max;
  852:     if (atom->nbRanges > 0) {
  853:         int i;
  854: 
  855:         ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
  856: 	                                           atom->nbRanges);
  857: 	if (ret->ranges == NULL) {
  858: 	    xmlRegexpErrMemory(ctxt, "copying atom");
  859: 	    goto error;
  860: 	}
  861: 	for (i = 0;i < atom->nbRanges;i++) {
  862: 	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
  863: 	    if (ret->ranges[i] == NULL)
  864: 	        goto error;
  865: 	    ret->nbRanges = i + 1;
  866: 	}
  867:     }
  868:     return(ret);
  869: 
  870: error:
  871:     xmlRegFreeAtom(ret);
  872:     return(NULL);
  873: }
  874: 
  875: static xmlRegStatePtr
  876: xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
  877:     xmlRegStatePtr ret;
  878: 
  879:     ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
  880:     if (ret == NULL) {
  881: 	xmlRegexpErrMemory(ctxt, "allocating state");
  882: 	return(NULL);
  883:     }
  884:     memset(ret, 0, sizeof(xmlRegState));
  885:     ret->type = XML_REGEXP_TRANS_STATE;
  886:     ret->mark = XML_REGEXP_MARK_NORMAL;
  887:     return(ret);
  888: }
  889: 
  890: /**
  891:  * xmlRegFreeState:
  892:  * @state:  the regexp state
  893:  *
  894:  * Free a regexp state
  895:  */
  896: static void
  897: xmlRegFreeState(xmlRegStatePtr state) {
  898:     if (state == NULL)
  899: 	return;
  900: 
  901:     if (state->trans != NULL)
  902: 	xmlFree(state->trans);
  903:     if (state->transTo != NULL)
  904: 	xmlFree(state->transTo);
  905:     xmlFree(state);
  906: }
  907: 
  908: /**
  909:  * xmlRegFreeParserCtxt:
  910:  * @ctxt:  the regexp parser context
  911:  *
  912:  * Free a regexp parser context
  913:  */
  914: static void
  915: xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
  916:     int i;
  917:     if (ctxt == NULL)
  918: 	return;
  919: 
  920:     if (ctxt->string != NULL)
  921: 	xmlFree(ctxt->string);
  922:     if (ctxt->states != NULL) {
  923: 	for (i = 0;i < ctxt->nbStates;i++)
  924: 	    xmlRegFreeState(ctxt->states[i]);
  925: 	xmlFree(ctxt->states);
  926:     }
  927:     if (ctxt->atoms != NULL) {
  928: 	for (i = 0;i < ctxt->nbAtoms;i++)
  929: 	    xmlRegFreeAtom(ctxt->atoms[i]);
  930: 	xmlFree(ctxt->atoms);
  931:     }
  932:     if (ctxt->counters != NULL)
  933: 	xmlFree(ctxt->counters);
  934:     xmlFree(ctxt);
  935: }
  936: 
  937: /************************************************************************
  938:  *									*
  939:  *			Display of Data structures			*
  940:  *									*
  941:  ************************************************************************/
  942: 
  943: static void
  944: xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
  945:     switch (type) {
  946:         case XML_REGEXP_EPSILON:
  947: 	    fprintf(output, "epsilon "); break;
  948:         case XML_REGEXP_CHARVAL:
  949: 	    fprintf(output, "charval "); break;
  950:         case XML_REGEXP_RANGES:
  951: 	    fprintf(output, "ranges "); break;
  952:         case XML_REGEXP_SUBREG:
  953: 	    fprintf(output, "subexpr "); break;
  954:         case XML_REGEXP_STRING:
  955: 	    fprintf(output, "string "); break;
  956:         case XML_REGEXP_ANYCHAR:
  957: 	    fprintf(output, "anychar "); break;
  958:         case XML_REGEXP_ANYSPACE:
  959: 	    fprintf(output, "anyspace "); break;
  960:         case XML_REGEXP_NOTSPACE:
  961: 	    fprintf(output, "notspace "); break;
  962:         case XML_REGEXP_INITNAME:
  963: 	    fprintf(output, "initname "); break;
  964:         case XML_REGEXP_NOTINITNAME:
  965: 	    fprintf(output, "notinitname "); break;
  966:         case XML_REGEXP_NAMECHAR:
  967: 	    fprintf(output, "namechar "); break;
  968:         case XML_REGEXP_NOTNAMECHAR:
  969: 	    fprintf(output, "notnamechar "); break;
  970:         case XML_REGEXP_DECIMAL:
  971: 	    fprintf(output, "decimal "); break;
  972:         case XML_REGEXP_NOTDECIMAL:
  973: 	    fprintf(output, "notdecimal "); break;
  974:         case XML_REGEXP_REALCHAR:
  975: 	    fprintf(output, "realchar "); break;
  976:         case XML_REGEXP_NOTREALCHAR:
  977: 	    fprintf(output, "notrealchar "); break;
  978:         case XML_REGEXP_LETTER:
  979:             fprintf(output, "LETTER "); break;
  980:         case XML_REGEXP_LETTER_UPPERCASE:
  981:             fprintf(output, "LETTER_UPPERCASE "); break;
  982:         case XML_REGEXP_LETTER_LOWERCASE:
  983:             fprintf(output, "LETTER_LOWERCASE "); break;
  984:         case XML_REGEXP_LETTER_TITLECASE:
  985:             fprintf(output, "LETTER_TITLECASE "); break;
  986:         case XML_REGEXP_LETTER_MODIFIER:
  987:             fprintf(output, "LETTER_MODIFIER "); break;
  988:         case XML_REGEXP_LETTER_OTHERS:
  989:             fprintf(output, "LETTER_OTHERS "); break;
  990:         case XML_REGEXP_MARK:
  991:             fprintf(output, "MARK "); break;
  992:         case XML_REGEXP_MARK_NONSPACING:
  993:             fprintf(output, "MARK_NONSPACING "); break;
  994:         case XML_REGEXP_MARK_SPACECOMBINING:
  995:             fprintf(output, "MARK_SPACECOMBINING "); break;
  996:         case XML_REGEXP_MARK_ENCLOSING:
  997:             fprintf(output, "MARK_ENCLOSING "); break;
  998:         case XML_REGEXP_NUMBER:
  999:             fprintf(output, "NUMBER "); break;
 1000:         case XML_REGEXP_NUMBER_DECIMAL:
 1001:             fprintf(output, "NUMBER_DECIMAL "); break;
 1002:         case XML_REGEXP_NUMBER_LETTER:
 1003:             fprintf(output, "NUMBER_LETTER "); break;
 1004:         case XML_REGEXP_NUMBER_OTHERS:
 1005:             fprintf(output, "NUMBER_OTHERS "); break;
 1006:         case XML_REGEXP_PUNCT:
 1007:             fprintf(output, "PUNCT "); break;
 1008:         case XML_REGEXP_PUNCT_CONNECTOR:
 1009:             fprintf(output, "PUNCT_CONNECTOR "); break;
 1010:         case XML_REGEXP_PUNCT_DASH:
 1011:             fprintf(output, "PUNCT_DASH "); break;
 1012:         case XML_REGEXP_PUNCT_OPEN:
 1013:             fprintf(output, "PUNCT_OPEN "); break;
 1014:         case XML_REGEXP_PUNCT_CLOSE:
 1015:             fprintf(output, "PUNCT_CLOSE "); break;
 1016:         case XML_REGEXP_PUNCT_INITQUOTE:
 1017:             fprintf(output, "PUNCT_INITQUOTE "); break;
 1018:         case XML_REGEXP_PUNCT_FINQUOTE:
 1019:             fprintf(output, "PUNCT_FINQUOTE "); break;
 1020:         case XML_REGEXP_PUNCT_OTHERS:
 1021:             fprintf(output, "PUNCT_OTHERS "); break;
 1022:         case XML_REGEXP_SEPAR:
 1023:             fprintf(output, "SEPAR "); break;
 1024:         case XML_REGEXP_SEPAR_SPACE:
 1025:             fprintf(output, "SEPAR_SPACE "); break;
 1026:         case XML_REGEXP_SEPAR_LINE:
 1027:             fprintf(output, "SEPAR_LINE "); break;
 1028:         case XML_REGEXP_SEPAR_PARA:
 1029:             fprintf(output, "SEPAR_PARA "); break;
 1030:         case XML_REGEXP_SYMBOL:
 1031:             fprintf(output, "SYMBOL "); break;
 1032:         case XML_REGEXP_SYMBOL_MATH:
 1033:             fprintf(output, "SYMBOL_MATH "); break;
 1034:         case XML_REGEXP_SYMBOL_CURRENCY:
 1035:             fprintf(output, "SYMBOL_CURRENCY "); break;
 1036:         case XML_REGEXP_SYMBOL_MODIFIER:
 1037:             fprintf(output, "SYMBOL_MODIFIER "); break;
 1038:         case XML_REGEXP_SYMBOL_OTHERS:
 1039:             fprintf(output, "SYMBOL_OTHERS "); break;
 1040:         case XML_REGEXP_OTHER:
 1041:             fprintf(output, "OTHER "); break;
 1042:         case XML_REGEXP_OTHER_CONTROL:
 1043:             fprintf(output, "OTHER_CONTROL "); break;
 1044:         case XML_REGEXP_OTHER_FORMAT:
 1045:             fprintf(output, "OTHER_FORMAT "); break;
 1046:         case XML_REGEXP_OTHER_PRIVATE:
 1047:             fprintf(output, "OTHER_PRIVATE "); break;
 1048:         case XML_REGEXP_OTHER_NA:
 1049:             fprintf(output, "OTHER_NA "); break;
 1050:         case XML_REGEXP_BLOCK_NAME:
 1051: 	    fprintf(output, "BLOCK "); break;
 1052:     }
 1053: }
 1054: 
 1055: static void
 1056: xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
 1057:     switch (type) {
 1058:         case XML_REGEXP_QUANT_EPSILON:
 1059: 	    fprintf(output, "epsilon "); break;
 1060:         case XML_REGEXP_QUANT_ONCE:
 1061: 	    fprintf(output, "once "); break;
 1062:         case XML_REGEXP_QUANT_OPT:
 1063: 	    fprintf(output, "? "); break;
 1064:         case XML_REGEXP_QUANT_MULT:
 1065: 	    fprintf(output, "* "); break;
 1066:         case XML_REGEXP_QUANT_PLUS:
 1067: 	    fprintf(output, "+ "); break;
 1068: 	case XML_REGEXP_QUANT_RANGE:
 1069: 	    fprintf(output, "range "); break;
 1070: 	case XML_REGEXP_QUANT_ONCEONLY:
 1071: 	    fprintf(output, "onceonly "); break;
 1072: 	case XML_REGEXP_QUANT_ALL:
 1073: 	    fprintf(output, "all "); break;
 1074:     }
 1075: }
 1076: static void
 1077: xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
 1078:     fprintf(output, "  range: ");
 1079:     if (range->neg)
 1080: 	fprintf(output, "negative ");
 1081:     xmlRegPrintAtomType(output, range->type);
 1082:     fprintf(output, "%c - %c\n", range->start, range->end);
 1083: }
 1084: 
 1085: static void
 1086: xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
 1087:     fprintf(output, " atom: ");
 1088:     if (atom == NULL) {
 1089: 	fprintf(output, "NULL\n");
 1090: 	return;
 1091:     }
 1092:     if (atom->neg)
 1093:         fprintf(output, "not ");
 1094:     xmlRegPrintAtomType(output, atom->type);
 1095:     xmlRegPrintQuantType(output, atom->quant);
 1096:     if (atom->quant == XML_REGEXP_QUANT_RANGE)
 1097: 	fprintf(output, "%d-%d ", atom->min, atom->max);
 1098:     if (atom->type == XML_REGEXP_STRING)
 1099: 	fprintf(output, "'%s' ", (char *) atom->valuep);
 1100:     if (atom->type == XML_REGEXP_CHARVAL)
 1101: 	fprintf(output, "char %c\n", atom->codepoint);
 1102:     else if (atom->type == XML_REGEXP_RANGES) {
 1103: 	int i;
 1104: 	fprintf(output, "%d entries\n", atom->nbRanges);
 1105: 	for (i = 0; i < atom->nbRanges;i++)
 1106: 	    xmlRegPrintRange(output, atom->ranges[i]);
 1107:     } else if (atom->type == XML_REGEXP_SUBREG) {
 1108: 	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
 1109:     } else {
 1110: 	fprintf(output, "\n");
 1111:     }
 1112: }
 1113: 
 1114: static void
 1115: xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
 1116:     fprintf(output, "  trans: ");
 1117:     if (trans == NULL) {
 1118: 	fprintf(output, "NULL\n");
 1119: 	return;
 1120:     }
 1121:     if (trans->to < 0) {
 1122: 	fprintf(output, "removed\n");
 1123: 	return;
 1124:     }
 1125:     if (trans->nd != 0) {
 1126: 	if (trans->nd == 2)
 1127: 	    fprintf(output, "last not determinist, ");
 1128: 	else
 1129: 	    fprintf(output, "not determinist, ");
 1130:     }
 1131:     if (trans->counter >= 0) {
 1132: 	fprintf(output, "counted %d, ", trans->counter);
 1133:     }
 1134:     if (trans->count == REGEXP_ALL_COUNTER) {
 1135: 	fprintf(output, "all transition, ");
 1136:     } else if (trans->count >= 0) {
 1137: 	fprintf(output, "count based %d, ", trans->count);
 1138:     }
 1139:     if (trans->atom == NULL) {
 1140: 	fprintf(output, "epsilon to %d\n", trans->to);
 1141: 	return;
 1142:     }
 1143:     if (trans->atom->type == XML_REGEXP_CHARVAL)
 1144: 	fprintf(output, "char %c ", trans->atom->codepoint);
 1145:     fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
 1146: }
 1147: 
 1148: static void
 1149: xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
 1150:     int i;
 1151: 
 1152:     fprintf(output, " state: ");
 1153:     if (state == NULL) {
 1154: 	fprintf(output, "NULL\n");
 1155: 	return;
 1156:     }
 1157:     if (state->type == XML_REGEXP_START_STATE)
 1158: 	fprintf(output, "START ");
 1159:     if (state->type == XML_REGEXP_FINAL_STATE)
 1160: 	fprintf(output, "FINAL ");
 1161: 
 1162:     fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
 1163:     for (i = 0;i < state->nbTrans; i++) {
 1164: 	xmlRegPrintTrans(output, &(state->trans[i]));
 1165:     }
 1166: }
 1167: 
 1168: #ifdef DEBUG_REGEXP_GRAPH
 1169: static void
 1170: xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
 1171:     int i;
 1172: 
 1173:     fprintf(output, " ctxt: ");
 1174:     if (ctxt == NULL) {
 1175: 	fprintf(output, "NULL\n");
 1176: 	return;
 1177:     }
 1178:     fprintf(output, "'%s' ", ctxt->string);
 1179:     if (ctxt->error)
 1180: 	fprintf(output, "error ");
 1181:     if (ctxt->neg)
 1182: 	fprintf(output, "neg ");
 1183:     fprintf(output, "\n");
 1184:     fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
 1185:     for (i = 0;i < ctxt->nbAtoms; i++) {
 1186: 	fprintf(output, " %02d ", i);
 1187: 	xmlRegPrintAtom(output, ctxt->atoms[i]);
 1188:     }
 1189:     if (ctxt->atom != NULL) {
 1190: 	fprintf(output, "current atom:\n");
 1191: 	xmlRegPrintAtom(output, ctxt->atom);
 1192:     }
 1193:     fprintf(output, "%d states:", ctxt->nbStates);
 1194:     if (ctxt->start != NULL)
 1195: 	fprintf(output, " start: %d", ctxt->start->no);
 1196:     if (ctxt->end != NULL)
 1197: 	fprintf(output, " end: %d", ctxt->end->no);
 1198:     fprintf(output, "\n");
 1199:     for (i = 0;i < ctxt->nbStates; i++) {
 1200: 	xmlRegPrintState(output, ctxt->states[i]);
 1201:     }
 1202:     fprintf(output, "%d counters:\n", ctxt->nbCounters);
 1203:     for (i = 0;i < ctxt->nbCounters; i++) {
 1204: 	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
 1205: 		                                ctxt->counters[i].max);
 1206:     }
 1207: }
 1208: #endif
 1209: 
 1210: /************************************************************************
 1211:  *									*
 1212:  *		 Finite Automata structures manipulations		*
 1213:  *									*
 1214:  ************************************************************************/
 1215: 
 1216: static void
 1217: xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
 1218: 	           int neg, xmlRegAtomType type, int start, int end,
 1219: 		   xmlChar *blockName) {
 1220:     xmlRegRangePtr range;
 1221: 
 1222:     if (atom == NULL) {
 1223: 	ERROR("add range: atom is NULL");
 1224: 	return;
 1225:     }
 1226:     if (atom->type != XML_REGEXP_RANGES) {
 1227: 	ERROR("add range: atom is not ranges");
 1228: 	return;
 1229:     }
 1230:     if (atom->maxRanges == 0) {
 1231: 	atom->maxRanges = 4;
 1232: 	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
 1233: 		                             sizeof(xmlRegRangePtr));
 1234: 	if (atom->ranges == NULL) {
 1235: 	    xmlRegexpErrMemory(ctxt, "adding ranges");
 1236: 	    atom->maxRanges = 0;
 1237: 	    return;
 1238: 	}
 1239:     } else if (atom->nbRanges >= atom->maxRanges) {
 1240: 	xmlRegRangePtr *tmp;
 1241: 	atom->maxRanges *= 2;
 1242: 	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
 1243: 		                             sizeof(xmlRegRangePtr));
 1244: 	if (tmp == NULL) {
 1245: 	    xmlRegexpErrMemory(ctxt, "adding ranges");
 1246: 	    atom->maxRanges /= 2;
 1247: 	    return;
 1248: 	}
 1249: 	atom->ranges = tmp;
 1250:     }
 1251:     range = xmlRegNewRange(ctxt, neg, type, start, end);
 1252:     if (range == NULL)
 1253: 	return;
 1254:     range->blockName = blockName;
 1255:     atom->ranges[atom->nbRanges++] = range;
 1256: 
 1257: }
 1258: 
 1259: static int
 1260: xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
 1261:     if (ctxt->maxCounters == 0) {
 1262: 	ctxt->maxCounters = 4;
 1263: 	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
 1264: 		                             sizeof(xmlRegCounter));
 1265: 	if (ctxt->counters == NULL) {
 1266: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1267: 	    ctxt->maxCounters = 0;
 1268: 	    return(-1);
 1269: 	}
 1270:     } else if (ctxt->nbCounters >= ctxt->maxCounters) {
 1271: 	xmlRegCounter *tmp;
 1272: 	ctxt->maxCounters *= 2;
 1273: 	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
 1274: 		                           sizeof(xmlRegCounter));
 1275: 	if (tmp == NULL) {
 1276: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1277: 	    ctxt->maxCounters /= 2;
 1278: 	    return(-1);
 1279: 	}
 1280: 	ctxt->counters = tmp;
 1281:     }
 1282:     ctxt->counters[ctxt->nbCounters].min = -1;
 1283:     ctxt->counters[ctxt->nbCounters].max = -1;
 1284:     return(ctxt->nbCounters++);
 1285: }
 1286: 
 1287: static int
 1288: xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
 1289:     if (atom == NULL) {
 1290: 	ERROR("atom push: atom is NULL");
 1291: 	return(-1);
 1292:     }
 1293:     if (ctxt->maxAtoms == 0) {
 1294: 	ctxt->maxAtoms = 4;
 1295: 	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
 1296: 		                             sizeof(xmlRegAtomPtr));
 1297: 	if (ctxt->atoms == NULL) {
 1298: 	    xmlRegexpErrMemory(ctxt, "pushing atom");
 1299: 	    ctxt->maxAtoms = 0;
 1300: 	    return(-1);
 1301: 	}
 1302:     } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
 1303: 	xmlRegAtomPtr *tmp;
 1304: 	ctxt->maxAtoms *= 2;
 1305: 	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
 1306: 		                             sizeof(xmlRegAtomPtr));
 1307: 	if (tmp == NULL) {
 1308: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1309: 	    ctxt->maxAtoms /= 2;
 1310: 	    return(-1);
 1311: 	}
 1312: 	ctxt->atoms = tmp;
 1313:     }
 1314:     atom->no = ctxt->nbAtoms;
 1315:     ctxt->atoms[ctxt->nbAtoms++] = atom;
 1316:     return(0);
 1317: }
 1318: 
 1319: static void
 1320: xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
 1321:                       int from) {
 1322:     if (target->maxTransTo == 0) {
 1323: 	target->maxTransTo = 8;
 1324: 	target->transTo = (int *) xmlMalloc(target->maxTransTo *
 1325: 		                             sizeof(int));
 1326: 	if (target->transTo == NULL) {
 1327: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1328: 	    target->maxTransTo = 0;
 1329: 	    return;
 1330: 	}
 1331:     } else if (target->nbTransTo >= target->maxTransTo) {
 1332: 	int *tmp;
 1333: 	target->maxTransTo *= 2;
 1334: 	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
 1335: 		                             sizeof(int));
 1336: 	if (tmp == NULL) {
 1337: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1338: 	    target->maxTransTo /= 2;
 1339: 	    return;
 1340: 	}
 1341: 	target->transTo = tmp;
 1342:     }
 1343:     target->transTo[target->nbTransTo] = from;
 1344:     target->nbTransTo++;
 1345: }
 1346: 
 1347: static void
 1348: xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 1349: 	            xmlRegAtomPtr atom, xmlRegStatePtr target,
 1350: 		    int counter, int count) {
 1351: 
 1352:     int nrtrans;
 1353: 
 1354:     if (state == NULL) {
 1355: 	ERROR("add state: state is NULL");
 1356: 	return;
 1357:     }
 1358:     if (target == NULL) {
 1359: 	ERROR("add state: target is NULL");
 1360: 	return;
 1361:     }
 1362:     /*
 1363:      * Other routines follow the philosophy 'When in doubt, add a transition'
 1364:      * so we check here whether such a transition is already present and, if
 1365:      * so, silently ignore this request.
 1366:      */
 1367: 
 1368:     for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
 1369: 	xmlRegTransPtr trans = &(state->trans[nrtrans]);
 1370: 	if ((trans->atom == atom) &&
 1371: 	    (trans->to == target->no) &&
 1372: 	    (trans->counter == counter) &&
 1373: 	    (trans->count == count)) {
 1374: #ifdef DEBUG_REGEXP_GRAPH
 1375: 	    printf("Ignoring duplicate transition from %d to %d\n",
 1376: 		    state->no, target->no);
 1377: #endif
 1378: 	    return;
 1379: 	}
 1380:     }
 1381: 
 1382:     if (state->maxTrans == 0) {
 1383: 	state->maxTrans = 8;
 1384: 	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
 1385: 		                             sizeof(xmlRegTrans));
 1386: 	if (state->trans == NULL) {
 1387: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1388: 	    state->maxTrans = 0;
 1389: 	    return;
 1390: 	}
 1391:     } else if (state->nbTrans >= state->maxTrans) {
 1392: 	xmlRegTrans *tmp;
 1393: 	state->maxTrans *= 2;
 1394: 	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
 1395: 		                             sizeof(xmlRegTrans));
 1396: 	if (tmp == NULL) {
 1397: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1398: 	    state->maxTrans /= 2;
 1399: 	    return;
 1400: 	}
 1401: 	state->trans = tmp;
 1402:     }
 1403: #ifdef DEBUG_REGEXP_GRAPH
 1404:     printf("Add trans from %d to %d ", state->no, target->no);
 1405:     if (count == REGEXP_ALL_COUNTER)
 1406: 	printf("all transition\n");
 1407:     else if (count >= 0)
 1408: 	printf("count based %d\n", count);
 1409:     else if (counter >= 0)
 1410: 	printf("counted %d\n", counter);
 1411:     else if (atom == NULL)
 1412: 	printf("epsilon transition\n");
 1413:     else if (atom != NULL)
 1414:         xmlRegPrintAtom(stdout, atom);
 1415: #endif
 1416: 
 1417:     state->trans[state->nbTrans].atom = atom;
 1418:     state->trans[state->nbTrans].to = target->no;
 1419:     state->trans[state->nbTrans].counter = counter;
 1420:     state->trans[state->nbTrans].count = count;
 1421:     state->trans[state->nbTrans].nd = 0;
 1422:     state->nbTrans++;
 1423:     xmlRegStateAddTransTo(ctxt, target, state->no);
 1424: }
 1425: 
 1426: static int
 1427: xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
 1428:     if (state == NULL) return(-1);
 1429:     if (ctxt->maxStates == 0) {
 1430: 	ctxt->maxStates = 4;
 1431: 	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
 1432: 		                             sizeof(xmlRegStatePtr));
 1433: 	if (ctxt->states == NULL) {
 1434: 	    xmlRegexpErrMemory(ctxt, "adding state");
 1435: 	    ctxt->maxStates = 0;
 1436: 	    return(-1);
 1437: 	}
 1438:     } else if (ctxt->nbStates >= ctxt->maxStates) {
 1439: 	xmlRegStatePtr *tmp;
 1440: 	ctxt->maxStates *= 2;
 1441: 	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
 1442: 		                             sizeof(xmlRegStatePtr));
 1443: 	if (tmp == NULL) {
 1444: 	    xmlRegexpErrMemory(ctxt, "adding state");
 1445: 	    ctxt->maxStates /= 2;
 1446: 	    return(-1);
 1447: 	}
 1448: 	ctxt->states = tmp;
 1449:     }
 1450:     state->no = ctxt->nbStates;
 1451:     ctxt->states[ctxt->nbStates++] = state;
 1452:     return(0);
 1453: }
 1454: 
 1455: /**
 1456:  * xmlFAGenerateAllTransition:
 1457:  * @ctxt:  a regexp parser context
 1458:  * @from:  the from state
 1459:  * @to:  the target state or NULL for building a new one
 1460:  * @lax:
 1461:  *
 1462:  */
 1463: static void
 1464: xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
 1465: 			   xmlRegStatePtr from, xmlRegStatePtr to,
 1466: 			   int lax) {
 1467:     if (to == NULL) {
 1468: 	to = xmlRegNewState(ctxt);
 1469: 	xmlRegStatePush(ctxt, to);
 1470: 	ctxt->state = to;
 1471:     }
 1472:     if (lax)
 1473: 	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
 1474:     else
 1475: 	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
 1476: }
 1477: 
 1478: /**
 1479:  * xmlFAGenerateEpsilonTransition:
 1480:  * @ctxt:  a regexp parser context
 1481:  * @from:  the from state
 1482:  * @to:  the target state or NULL for building a new one
 1483:  *
 1484:  */
 1485: static void
 1486: xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
 1487: 			       xmlRegStatePtr from, xmlRegStatePtr to) {
 1488:     if (to == NULL) {
 1489: 	to = xmlRegNewState(ctxt);
 1490: 	xmlRegStatePush(ctxt, to);
 1491: 	ctxt->state = to;
 1492:     }
 1493:     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
 1494: }
 1495: 
 1496: /**
 1497:  * xmlFAGenerateCountedEpsilonTransition:
 1498:  * @ctxt:  a regexp parser context
 1499:  * @from:  the from state
 1500:  * @to:  the target state or NULL for building a new one
 1501:  * counter:  the counter for that transition
 1502:  *
 1503:  */
 1504: static void
 1505: xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
 1506: 	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
 1507:     if (to == NULL) {
 1508: 	to = xmlRegNewState(ctxt);
 1509: 	xmlRegStatePush(ctxt, to);
 1510: 	ctxt->state = to;
 1511:     }
 1512:     xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
 1513: }
 1514: 
 1515: /**
 1516:  * xmlFAGenerateCountedTransition:
 1517:  * @ctxt:  a regexp parser context
 1518:  * @from:  the from state
 1519:  * @to:  the target state or NULL for building a new one
 1520:  * counter:  the counter for that transition
 1521:  *
 1522:  */
 1523: static void
 1524: xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
 1525: 	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
 1526:     if (to == NULL) {
 1527: 	to = xmlRegNewState(ctxt);
 1528: 	xmlRegStatePush(ctxt, to);
 1529: 	ctxt->state = to;
 1530:     }
 1531:     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
 1532: }
 1533: 
 1534: /**
 1535:  * xmlFAGenerateTransitions:
 1536:  * @ctxt:  a regexp parser context
 1537:  * @from:  the from state
 1538:  * @to:  the target state or NULL for building a new one
 1539:  * @atom:  the atom generating the transition
 1540:  *
 1541:  * Returns 0 if success and -1 in case of error.
 1542:  */
 1543: static int
 1544: xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 1545: 	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
 1546:     xmlRegStatePtr end;
 1547: 
 1548:     if (atom == NULL) {
 1549: 	ERROR("genrate transition: atom == NULL");
 1550: 	return(-1);
 1551:     }
 1552:     if (atom->type == XML_REGEXP_SUBREG) {
 1553: 	/*
 1554: 	 * this is a subexpression handling one should not need to
 1555: 	 * create a new node except for XML_REGEXP_QUANT_RANGE.
 1556: 	 */
 1557: 	if (xmlRegAtomPush(ctxt, atom) < 0) {
 1558: 	    return(-1);
 1559: 	}
 1560: 	if ((to != NULL) && (atom->stop != to) &&
 1561: 	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
 1562: 	    /*
 1563: 	     * Generate an epsilon transition to link to the target
 1564: 	     */
 1565: 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 1566: #ifdef DV
 1567: 	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) &&
 1568: 		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
 1569: 	    to = xmlRegNewState(ctxt);
 1570: 	    xmlRegStatePush(ctxt, to);
 1571: 	    ctxt->state = to;
 1572: 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 1573: #endif
 1574: 	}
 1575: 	switch (atom->quant) {
 1576: 	    case XML_REGEXP_QUANT_OPT:
 1577: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1578: 		/*
 1579: 		 * transition done to the state after end of atom.
 1580: 		 *      1. set transition from atom start to new state
 1581: 		 *      2. set transition from atom end to this state.
 1582: 		 */
 1583:                 if (to == NULL) {
 1584:                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
 1585:                     xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
 1586:                                                    ctxt->state);
 1587:                 } else {
 1588:                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
 1589:                 }
 1590: 		break;
 1591: 	    case XML_REGEXP_QUANT_MULT:
 1592: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1593: 		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
 1594: 		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
 1595: 		break;
 1596: 	    case XML_REGEXP_QUANT_PLUS:
 1597: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1598: 		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
 1599: 		break;
 1600: 	    case XML_REGEXP_QUANT_RANGE: {
 1601: 		int counter;
 1602: 		xmlRegStatePtr inter, newstate;
 1603: 
 1604: 		/*
 1605: 		 * create the final state now if needed
 1606: 		 */
 1607: 		if (to != NULL) {
 1608: 		    newstate = to;
 1609: 		} else {
 1610: 		    newstate = xmlRegNewState(ctxt);
 1611: 		    xmlRegStatePush(ctxt, newstate);
 1612: 		}
 1613: 
 1614: 		/*
 1615: 		 * The principle here is to use counted transition
 1616: 		 * to avoid explosion in the number of states in the
 1617: 		 * graph. This is clearly more complex but should not
 1618: 		 * be exploitable at runtime.
 1619: 		 */
 1620: 		if ((atom->min == 0) && (atom->start0 == NULL)) {
 1621: 		    xmlRegAtomPtr copy;
 1622: 		    /*
 1623: 		     * duplicate a transition based on atom to count next
 1624: 		     * occurences after 1. We cannot loop to atom->start
 1625: 		     * directly because we need an epsilon transition to
 1626: 		     * newstate.
 1627: 		     */
 1628: 		     /* ???? For some reason it seems we never reach that
 1629: 		        case, I suppose this got optimized out before when
 1630: 			building the automata */
 1631: 		    copy = xmlRegCopyAtom(ctxt, atom);
 1632: 		    if (copy == NULL)
 1633: 		        return(-1);
 1634: 		    copy->quant = XML_REGEXP_QUANT_ONCE;
 1635: 		    copy->min = 0;
 1636: 		    copy->max = 0;
 1637: 
 1638: 		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
 1639: 		        < 0)
 1640: 			return(-1);
 1641: 		    inter = ctxt->state;
 1642: 		    counter = xmlRegGetCounter(ctxt);
 1643: 		    ctxt->counters[counter].min = atom->min - 1;
 1644: 		    ctxt->counters[counter].max = atom->max - 1;
 1645: 		    /* count the number of times we see it again */
 1646: 		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
 1647: 						   atom->stop, counter);
 1648: 		    /* allow a way out based on the count */
 1649: 		    xmlFAGenerateCountedTransition(ctxt, inter,
 1650: 			                           newstate, counter);
 1651: 		    /* and also allow a direct exit for 0 */
 1652: 		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
 1653: 		                                   newstate);
 1654: 		} else {
 1655: 		    /*
 1656: 		     * either we need the atom at least once or there
 1657: 		     * is an atom->start0 allowing to easilly plug the
 1658: 		     * epsilon transition.
 1659: 		     */
 1660: 		    counter = xmlRegGetCounter(ctxt);
 1661: 		    ctxt->counters[counter].min = atom->min - 1;
 1662: 		    ctxt->counters[counter].max = atom->max - 1;
 1663: 		    /* count the number of times we see it again */
 1664: 		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
 1665: 						   atom->start, counter);
 1666: 		    /* allow a way out based on the count */
 1667: 		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
 1668: 			                           newstate, counter);
 1669: 		    /* and if needed allow a direct exit for 0 */
 1670: 		    if (atom->min == 0)
 1671: 			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
 1672: 						       newstate);
 1673: 
 1674: 		}
 1675: 		atom->min = 0;
 1676: 		atom->max = 0;
 1677: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1678: 		ctxt->state = newstate;
 1679: 	    }
 1680: 	    default:
 1681: 		break;
 1682: 	}
 1683: 	return(0);
 1684:     }
 1685:     if ((atom->min == 0) && (atom->max == 0) &&
 1686:                (atom->quant == XML_REGEXP_QUANT_RANGE)) {
 1687:         /*
 1688: 	 * we can discard the atom and generate an epsilon transition instead
 1689: 	 */
 1690: 	if (to == NULL) {
 1691: 	    to = xmlRegNewState(ctxt);
 1692: 	    if (to != NULL)
 1693: 		xmlRegStatePush(ctxt, to);
 1694: 	    else {
 1695: 		return(-1);
 1696: 	    }
 1697: 	}
 1698: 	xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1699: 	ctxt->state = to;
 1700: 	xmlRegFreeAtom(atom);
 1701: 	return(0);
 1702:     }
 1703:     if (to == NULL) {
 1704: 	to = xmlRegNewState(ctxt);
 1705: 	if (to != NULL)
 1706: 	    xmlRegStatePush(ctxt, to);
 1707: 	else {
 1708: 	    return(-1);
 1709: 	}
 1710:     }
 1711:     end = to;
 1712:     if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
 1713:         (atom->quant == XML_REGEXP_QUANT_PLUS)) {
 1714: 	/*
 1715: 	 * Do not pollute the target state by adding transitions from
 1716: 	 * it as it is likely to be the shared target of multiple branches.
 1717: 	 * So isolate with an epsilon transition.
 1718: 	 */
 1719:         xmlRegStatePtr tmp;
 1720: 
 1721: 	tmp = xmlRegNewState(ctxt);
 1722: 	if (tmp != NULL)
 1723: 	    xmlRegStatePush(ctxt, tmp);
 1724: 	else {
 1725: 	    return(-1);
 1726: 	}
 1727: 	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
 1728: 	to = tmp;
 1729:     }
 1730:     if (xmlRegAtomPush(ctxt, atom) < 0) {
 1731: 	return(-1);
 1732:     }
 1733:     xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
 1734:     ctxt->state = end;
 1735:     switch (atom->quant) {
 1736: 	case XML_REGEXP_QUANT_OPT:
 1737: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1738: 	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1739: 	    break;
 1740: 	case XML_REGEXP_QUANT_MULT:
 1741: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1742: 	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1743: 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 1744: 	    break;
 1745: 	case XML_REGEXP_QUANT_PLUS:
 1746: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1747: 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 1748: 	    break;
 1749: 	case XML_REGEXP_QUANT_RANGE:
 1750: #if DV_test
 1751: 	    if (atom->min == 0) {
 1752: 		xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1753: 	    }
 1754: #endif
 1755: 	    break;
 1756: 	default:
 1757: 	    break;
 1758:     }
 1759:     return(0);
 1760: }
 1761: 
 1762: /**
 1763:  * xmlFAReduceEpsilonTransitions:
 1764:  * @ctxt:  a regexp parser context
 1765:  * @fromnr:  the from state
 1766:  * @tonr:  the to state
 1767:  * @counter:  should that transition be associated to a counted
 1768:  *
 1769:  */
 1770: static void
 1771: xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
 1772: 	                      int tonr, int counter) {
 1773:     int transnr;
 1774:     xmlRegStatePtr from;
 1775:     xmlRegStatePtr to;
 1776: 
 1777: #ifdef DEBUG_REGEXP_GRAPH
 1778:     printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
 1779: #endif
 1780:     from = ctxt->states[fromnr];
 1781:     if (from == NULL)
 1782: 	return;
 1783:     to = ctxt->states[tonr];
 1784:     if (to == NULL)
 1785: 	return;
 1786:     if ((to->mark == XML_REGEXP_MARK_START) ||
 1787: 	(to->mark == XML_REGEXP_MARK_VISITED))
 1788: 	return;
 1789: 
 1790:     to->mark = XML_REGEXP_MARK_VISITED;
 1791:     if (to->type == XML_REGEXP_FINAL_STATE) {
 1792: #ifdef DEBUG_REGEXP_GRAPH
 1793: 	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
 1794: #endif
 1795: 	from->type = XML_REGEXP_FINAL_STATE;
 1796:     }
 1797:     for (transnr = 0;transnr < to->nbTrans;transnr++) {
 1798:         if (to->trans[transnr].to < 0)
 1799: 	    continue;
 1800: 	if (to->trans[transnr].atom == NULL) {
 1801: 	    /*
 1802: 	     * Don't remove counted transitions
 1803: 	     * Don't loop either
 1804: 	     */
 1805: 	    if (to->trans[transnr].to != fromnr) {
 1806: 		if (to->trans[transnr].count >= 0) {
 1807: 		    int newto = to->trans[transnr].to;
 1808: 
 1809: 		    xmlRegStateAddTrans(ctxt, from, NULL,
 1810: 					ctxt->states[newto],
 1811: 					-1, to->trans[transnr].count);
 1812: 		} else {
 1813: #ifdef DEBUG_REGEXP_GRAPH
 1814: 		    printf("Found epsilon trans %d from %d to %d\n",
 1815: 			   transnr, tonr, to->trans[transnr].to);
 1816: #endif
 1817: 		    if (to->trans[transnr].counter >= 0) {
 1818: 			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
 1819: 					      to->trans[transnr].to,
 1820: 					      to->trans[transnr].counter);
 1821: 		    } else {
 1822: 			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
 1823: 					      to->trans[transnr].to,
 1824: 					      counter);
 1825: 		    }
 1826: 		}
 1827: 	    }
 1828: 	} else {
 1829: 	    int newto = to->trans[transnr].to;
 1830: 
 1831: 	    if (to->trans[transnr].counter >= 0) {
 1832: 		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
 1833: 				    ctxt->states[newto],
 1834: 				    to->trans[transnr].counter, -1);
 1835: 	    } else {
 1836: 		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
 1837: 				    ctxt->states[newto], counter, -1);
 1838: 	    }
 1839: 	}
 1840:     }
 1841:     to->mark = XML_REGEXP_MARK_NORMAL;
 1842: }
 1843: 
 1844: /**
 1845:  * xmlFAEliminateSimpleEpsilonTransitions:
 1846:  * @ctxt:  a regexp parser context
 1847:  *
 1848:  * Eliminating general epsilon transitions can get costly in the general
 1849:  * algorithm due to the large amount of generated new transitions and
 1850:  * associated comparisons. However for simple epsilon transition used just
 1851:  * to separate building blocks when generating the automata this can be
 1852:  * reduced to state elimination:
 1853:  *    - if there exists an epsilon from X to Y
 1854:  *    - if there is no other transition from X
 1855:  * then X and Y are semantically equivalent and X can be eliminated
 1856:  * If X is the start state then make Y the start state, else replace the
 1857:  * target of all transitions to X by transitions to Y.
 1858:  */
 1859: static void
 1860: xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 1861:     int statenr, i, j, newto;
 1862:     xmlRegStatePtr state, tmp;
 1863: 
 1864:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1865: 	state = ctxt->states[statenr];
 1866: 	if (state == NULL)
 1867: 	    continue;
 1868: 	if (state->nbTrans != 1)
 1869: 	    continue;
 1870: 	if (state->type == XML_REGEXP_UNREACH_STATE)
 1871: 	    continue;
 1872: 	/* is the only transition out a basic transition */
 1873: 	if ((state->trans[0].atom == NULL) &&
 1874: 	    (state->trans[0].to >= 0) &&
 1875: 	    (state->trans[0].to != statenr) &&
 1876: 	    (state->trans[0].counter < 0) &&
 1877: 	    (state->trans[0].count < 0)) {
 1878: 	    newto = state->trans[0].to;
 1879: 
 1880:             if (state->type == XML_REGEXP_START_STATE) {
 1881: #ifdef DEBUG_REGEXP_GRAPH
 1882: 		printf("Found simple epsilon trans from start %d to %d\n",
 1883: 		       statenr, newto);
 1884: #endif
 1885:             } else {
 1886: #ifdef DEBUG_REGEXP_GRAPH
 1887: 		printf("Found simple epsilon trans from %d to %d\n",
 1888: 		       statenr, newto);
 1889: #endif
 1890: 	        for (i = 0;i < state->nbTransTo;i++) {
 1891: 		    tmp = ctxt->states[state->transTo[i]];
 1892: 		    for (j = 0;j < tmp->nbTrans;j++) {
 1893: 			if (tmp->trans[j].to == statenr) {
 1894: #ifdef DEBUG_REGEXP_GRAPH
 1895: 			    printf("Changed transition %d on %d to go to %d\n",
 1896: 				   j, tmp->no, newto);
 1897: #endif
 1898: 			    tmp->trans[j].to = -1;
 1899: 			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
 1900: 						ctxt->states[newto],
 1901: 					        tmp->trans[j].counter,
 1902: 						tmp->trans[j].count);
 1903: 			}
 1904: 		    }
 1905: 		}
 1906: 		if (state->type == XML_REGEXP_FINAL_STATE)
 1907: 		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
 1908: 		/* eliminate the transition completely */
 1909: 		state->nbTrans = 0;
 1910: 
 1911:                 state->type = XML_REGEXP_UNREACH_STATE;
 1912: 
 1913: 	    }
 1914: 
 1915: 	}
 1916:     }
 1917: }
 1918: /**
 1919:  * xmlFAEliminateEpsilonTransitions:
 1920:  * @ctxt:  a regexp parser context
 1921:  *
 1922:  */
 1923: static void
 1924: xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 1925:     int statenr, transnr;
 1926:     xmlRegStatePtr state;
 1927:     int has_epsilon;
 1928: 
 1929:     if (ctxt->states == NULL) return;
 1930: 
 1931:     /*
 1932:      * Eliminate simple epsilon transition and the associated unreachable
 1933:      * states.
 1934:      */
 1935:     xmlFAEliminateSimpleEpsilonTransitions(ctxt);
 1936:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1937: 	state = ctxt->states[statenr];
 1938: 	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
 1939: #ifdef DEBUG_REGEXP_GRAPH
 1940: 	    printf("Removed unreachable state %d\n", statenr);
 1941: #endif
 1942: 	    xmlRegFreeState(state);
 1943: 	    ctxt->states[statenr] = NULL;
 1944: 	}
 1945:     }
 1946: 
 1947:     has_epsilon = 0;
 1948: 
 1949:     /*
 1950:      * Build the completed transitions bypassing the epsilons
 1951:      * Use a marking algorithm to avoid loops
 1952:      * Mark sink states too.
 1953:      * Process from the latests states backward to the start when
 1954:      * there is long cascading epsilon chains this minimize the
 1955:      * recursions and transition compares when adding the new ones
 1956:      */
 1957:     for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
 1958: 	state = ctxt->states[statenr];
 1959: 	if (state == NULL)
 1960: 	    continue;
 1961: 	if ((state->nbTrans == 0) &&
 1962: 	    (state->type != XML_REGEXP_FINAL_STATE)) {
 1963: 	    state->type = XML_REGEXP_SINK_STATE;
 1964: 	}
 1965: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 1966: 	    if ((state->trans[transnr].atom == NULL) &&
 1967: 		(state->trans[transnr].to >= 0)) {
 1968: 		if (state->trans[transnr].to == statenr) {
 1969: 		    state->trans[transnr].to = -1;
 1970: #ifdef DEBUG_REGEXP_GRAPH
 1971: 		    printf("Removed loopback epsilon trans %d on %d\n",
 1972: 			   transnr, statenr);
 1973: #endif
 1974: 		} else if (state->trans[transnr].count < 0) {
 1975: 		    int newto = state->trans[transnr].to;
 1976: 
 1977: #ifdef DEBUG_REGEXP_GRAPH
 1978: 		    printf("Found epsilon trans %d from %d to %d\n",
 1979: 			   transnr, statenr, newto);
 1980: #endif
 1981: 		    has_epsilon = 1;
 1982: 		    state->trans[transnr].to = -2;
 1983: 		    state->mark = XML_REGEXP_MARK_START;
 1984: 		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
 1985: 				      newto, state->trans[transnr].counter);
 1986: 		    state->mark = XML_REGEXP_MARK_NORMAL;
 1987: #ifdef DEBUG_REGEXP_GRAPH
 1988: 		} else {
 1989: 		    printf("Found counted transition %d on %d\n",
 1990: 			   transnr, statenr);
 1991: #endif
 1992: 	        }
 1993: 	    }
 1994: 	}
 1995:     }
 1996:     /*
 1997:      * Eliminate the epsilon transitions
 1998:      */
 1999:     if (has_epsilon) {
 2000: 	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2001: 	    state = ctxt->states[statenr];
 2002: 	    if (state == NULL)
 2003: 		continue;
 2004: 	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2005: 		xmlRegTransPtr trans = &(state->trans[transnr]);
 2006: 		if ((trans->atom == NULL) &&
 2007: 		    (trans->count < 0) &&
 2008: 		    (trans->to >= 0)) {
 2009: 		    trans->to = -1;
 2010: 		}
 2011: 	    }
 2012: 	}
 2013:     }
 2014: 
 2015:     /*
 2016:      * Use this pass to detect unreachable states too
 2017:      */
 2018:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2019: 	state = ctxt->states[statenr];
 2020: 	if (state != NULL)
 2021: 	    state->reached = XML_REGEXP_MARK_NORMAL;
 2022:     }
 2023:     state = ctxt->states[0];
 2024:     if (state != NULL)
 2025: 	state->reached = XML_REGEXP_MARK_START;
 2026:     while (state != NULL) {
 2027: 	xmlRegStatePtr target = NULL;
 2028: 	state->reached = XML_REGEXP_MARK_VISITED;
 2029: 	/*
 2030: 	 * Mark all states reachable from the current reachable state
 2031: 	 */
 2032: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2033: 	    if ((state->trans[transnr].to >= 0) &&
 2034: 		((state->trans[transnr].atom != NULL) ||
 2035: 		 (state->trans[transnr].count >= 0))) {
 2036: 		int newto = state->trans[transnr].to;
 2037: 
 2038: 		if (ctxt->states[newto] == NULL)
 2039: 		    continue;
 2040: 		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
 2041: 		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
 2042: 		    target = ctxt->states[newto];
 2043: 		}
 2044: 	    }
 2045: 	}
 2046: 
 2047: 	/*
 2048: 	 * find the next accessible state not explored
 2049: 	 */
 2050: 	if (target == NULL) {
 2051: 	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
 2052: 		state = ctxt->states[statenr];
 2053: 		if ((state != NULL) && (state->reached ==
 2054: 			XML_REGEXP_MARK_START)) {
 2055: 		    target = state;
 2056: 		    break;
 2057: 		}
 2058: 	    }
 2059: 	}
 2060: 	state = target;
 2061:     }
 2062:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2063: 	state = ctxt->states[statenr];
 2064: 	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
 2065: #ifdef DEBUG_REGEXP_GRAPH
 2066: 	    printf("Removed unreachable state %d\n", statenr);
 2067: #endif
 2068: 	    xmlRegFreeState(state);
 2069: 	    ctxt->states[statenr] = NULL;
 2070: 	}
 2071:     }
 2072: 
 2073: }
 2074: 
 2075: static int
 2076: xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
 2077:     int ret = 0;
 2078: 
 2079:     if ((range1->type == XML_REGEXP_RANGES) ||
 2080:         (range2->type == XML_REGEXP_RANGES) ||
 2081:         (range2->type == XML_REGEXP_SUBREG) ||
 2082:         (range1->type == XML_REGEXP_SUBREG) ||
 2083:         (range1->type == XML_REGEXP_STRING) ||
 2084:         (range2->type == XML_REGEXP_STRING))
 2085: 	return(-1);
 2086: 
 2087:     /* put them in order */
 2088:     if (range1->type > range2->type) {
 2089:         xmlRegRangePtr tmp;
 2090: 
 2091: 	tmp = range1;
 2092: 	range1 = range2;
 2093: 	range2 = tmp;
 2094:     }
 2095:     if ((range1->type == XML_REGEXP_ANYCHAR) ||
 2096:         (range2->type == XML_REGEXP_ANYCHAR)) {
 2097: 	ret = 1;
 2098:     } else if ((range1->type == XML_REGEXP_EPSILON) ||
 2099:                (range2->type == XML_REGEXP_EPSILON)) {
 2100: 	return(0);
 2101:     } else if (range1->type == range2->type) {
 2102:         if (range1->type != XML_REGEXP_CHARVAL)
 2103:             ret = 1;
 2104:         else if ((range1->end < range2->start) ||
 2105: 	         (range2->end < range1->start))
 2106: 	    ret = 0;
 2107: 	else
 2108: 	    ret = 1;
 2109:     } else if (range1->type == XML_REGEXP_CHARVAL) {
 2110:         int codepoint;
 2111: 	int neg = 0;
 2112: 
 2113: 	/*
 2114: 	 * just check all codepoints in the range for acceptance,
 2115: 	 * this is usually way cheaper since done only once at
 2116: 	 * compilation than testing over and over at runtime or
 2117: 	 * pushing too many states when evaluating.
 2118: 	 */
 2119: 	if (((range1->neg == 0) && (range2->neg != 0)) ||
 2120: 	    ((range1->neg != 0) && (range2->neg == 0)))
 2121: 	    neg = 1;
 2122: 
 2123: 	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
 2124: 	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
 2125: 					    0, range2->start, range2->end,
 2126: 					    range2->blockName);
 2127: 	    if (ret < 0)
 2128: 	        return(-1);
 2129: 	    if (((neg == 1) && (ret == 0)) ||
 2130: 	        ((neg == 0) && (ret == 1)))
 2131: 		return(1);
 2132: 	}
 2133: 	return(0);
 2134:     } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
 2135:                (range2->type == XML_REGEXP_BLOCK_NAME)) {
 2136: 	if (range1->type == range2->type) {
 2137: 	    ret = xmlStrEqual(range1->blockName, range2->blockName);
 2138: 	} else {
 2139: 	    /*
 2140: 	     * comparing a block range with anything else is way
 2141: 	     * too costly, and maintining the table is like too much
 2142: 	     * memory too, so let's force the automata to save state
 2143: 	     * here.
 2144: 	     */
 2145: 	    return(1);
 2146: 	}
 2147:     } else if ((range1->type < XML_REGEXP_LETTER) ||
 2148:                (range2->type < XML_REGEXP_LETTER)) {
 2149: 	if ((range1->type == XML_REGEXP_ANYSPACE) &&
 2150: 	    (range2->type == XML_REGEXP_NOTSPACE))
 2151: 	    ret = 0;
 2152: 	else if ((range1->type == XML_REGEXP_INITNAME) &&
 2153: 	         (range2->type == XML_REGEXP_NOTINITNAME))
 2154: 	    ret = 0;
 2155: 	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
 2156: 	         (range2->type == XML_REGEXP_NOTNAMECHAR))
 2157: 	    ret = 0;
 2158: 	else if ((range1->type == XML_REGEXP_DECIMAL) &&
 2159: 	         (range2->type == XML_REGEXP_NOTDECIMAL))
 2160: 	    ret = 0;
 2161: 	else if ((range1->type == XML_REGEXP_REALCHAR) &&
 2162: 	         (range2->type == XML_REGEXP_NOTREALCHAR))
 2163: 	    ret = 0;
 2164: 	else {
 2165: 	    /* same thing to limit complexity */
 2166: 	    return(1);
 2167: 	}
 2168:     } else {
 2169:         ret = 0;
 2170:         /* range1->type < range2->type here */
 2171:         switch (range1->type) {
 2172: 	    case XML_REGEXP_LETTER:
 2173: 	         /* all disjoint except in the subgroups */
 2174: 	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
 2175: 		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
 2176: 		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
 2177: 		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
 2178: 		     (range2->type == XML_REGEXP_LETTER_OTHERS))
 2179: 		     ret = 1;
 2180: 		 break;
 2181: 	    case XML_REGEXP_MARK:
 2182: 	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
 2183: 		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
 2184: 		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
 2185: 		     ret = 1;
 2186: 		 break;
 2187: 	    case XML_REGEXP_NUMBER:
 2188: 	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
 2189: 		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
 2190: 		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
 2191: 		     ret = 1;
 2192: 		 break;
 2193: 	    case XML_REGEXP_PUNCT:
 2194: 	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
 2195: 		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
 2196: 		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
 2197: 		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
 2198: 		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
 2199: 		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
 2200: 		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
 2201: 		     ret = 1;
 2202: 		 break;
 2203: 	    case XML_REGEXP_SEPAR:
 2204: 	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
 2205: 		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
 2206: 		     (range2->type == XML_REGEXP_SEPAR_PARA))
 2207: 		     ret = 1;
 2208: 		 break;
 2209: 	    case XML_REGEXP_SYMBOL:
 2210: 	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
 2211: 		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
 2212: 		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
 2213: 		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
 2214: 		     ret = 1;
 2215: 		 break;
 2216: 	    case XML_REGEXP_OTHER:
 2217: 	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
 2218: 		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
 2219: 		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
 2220: 		     ret = 1;
 2221: 		 break;
 2222:             default:
 2223: 	         if ((range2->type >= XML_REGEXP_LETTER) &&
 2224: 		     (range2->type < XML_REGEXP_BLOCK_NAME))
 2225: 		     ret = 0;
 2226: 		 else {
 2227: 		     /* safety net ! */
 2228: 		     return(1);
 2229: 		 }
 2230: 	}
 2231:     }
 2232:     if (((range1->neg == 0) && (range2->neg != 0)) ||
 2233:         ((range1->neg != 0) && (range2->neg == 0)))
 2234: 	ret = !ret;
 2235:     return(ret);
 2236: }
 2237: 
 2238: /**
 2239:  * xmlFACompareAtomTypes:
 2240:  * @type1:  an atom type
 2241:  * @type2:  an atom type
 2242:  *
 2243:  * Compares two atoms type to check whether they intersect in some ways,
 2244:  * this is used by xmlFACompareAtoms only
 2245:  *
 2246:  * Returns 1 if they may intersect and 0 otherwise
 2247:  */
 2248: static int
 2249: xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
 2250:     if ((type1 == XML_REGEXP_EPSILON) ||
 2251:         (type1 == XML_REGEXP_CHARVAL) ||
 2252: 	(type1 == XML_REGEXP_RANGES) ||
 2253: 	(type1 == XML_REGEXP_SUBREG) ||
 2254: 	(type1 == XML_REGEXP_STRING) ||
 2255: 	(type1 == XML_REGEXP_ANYCHAR))
 2256: 	return(1);
 2257:     if ((type2 == XML_REGEXP_EPSILON) ||
 2258:         (type2 == XML_REGEXP_CHARVAL) ||
 2259: 	(type2 == XML_REGEXP_RANGES) ||
 2260: 	(type2 == XML_REGEXP_SUBREG) ||
 2261: 	(type2 == XML_REGEXP_STRING) ||
 2262: 	(type2 == XML_REGEXP_ANYCHAR))
 2263: 	return(1);
 2264: 
 2265:     if (type1 == type2) return(1);
 2266: 
 2267:     /* simplify subsequent compares by making sure type1 < type2 */
 2268:     if (type1 > type2) {
 2269:         xmlRegAtomType tmp = type1;
 2270: 	type1 = type2;
 2271: 	type2 = tmp;
 2272:     }
 2273:     switch (type1) {
 2274:         case XML_REGEXP_ANYSPACE: /* \s */
 2275: 	    /* can't be a letter, number, mark, pontuation, symbol */
 2276: 	    if ((type2 == XML_REGEXP_NOTSPACE) ||
 2277: 		((type2 >= XML_REGEXP_LETTER) &&
 2278: 		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
 2279: 	        ((type2 >= XML_REGEXP_NUMBER) &&
 2280: 		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
 2281: 	        ((type2 >= XML_REGEXP_MARK) &&
 2282: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2283: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2284: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2285: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2286: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
 2287: 	        ) return(0);
 2288: 	    break;
 2289:         case XML_REGEXP_NOTSPACE: /* \S */
 2290: 	    break;
 2291:         case XML_REGEXP_INITNAME: /* \l */
 2292: 	    /* can't be a number, mark, separator, pontuation, symbol or other */
 2293: 	    if ((type2 == XML_REGEXP_NOTINITNAME) ||
 2294: 	        ((type2 >= XML_REGEXP_NUMBER) &&
 2295: 		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
 2296: 	        ((type2 >= XML_REGEXP_MARK) &&
 2297: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2298: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2299: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2300: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2301: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2302: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2303: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2304: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2305: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2306: 		) return(0);
 2307: 	    break;
 2308:         case XML_REGEXP_NOTINITNAME: /* \L */
 2309: 	    break;
 2310:         case XML_REGEXP_NAMECHAR: /* \c */
 2311: 	    /* can't be a mark, separator, pontuation, symbol or other */
 2312: 	    if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
 2313: 	        ((type2 >= XML_REGEXP_MARK) &&
 2314: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2315: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2316: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2317: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2318: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2319: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2320: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2321: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2322: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2323: 		) return(0);
 2324: 	    break;
 2325:         case XML_REGEXP_NOTNAMECHAR: /* \C */
 2326: 	    break;
 2327:         case XML_REGEXP_DECIMAL: /* \d */
 2328: 	    /* can't be a letter, mark, separator, pontuation, symbol or other */
 2329: 	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
 2330: 	        (type2 == XML_REGEXP_REALCHAR) ||
 2331: 		((type2 >= XML_REGEXP_LETTER) &&
 2332: 		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
 2333: 	        ((type2 >= XML_REGEXP_MARK) &&
 2334: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2335: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2336: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2337: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2338: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2339: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2340: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2341: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2342: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2343: 		)return(0);
 2344: 	    break;
 2345:         case XML_REGEXP_NOTDECIMAL: /* \D */
 2346: 	    break;
 2347:         case XML_REGEXP_REALCHAR: /* \w */
 2348: 	    /* can't be a mark, separator, pontuation, symbol or other */
 2349: 	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
 2350: 	        ((type2 >= XML_REGEXP_MARK) &&
 2351: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2352: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2353: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2354: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2355: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2356: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2357: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2358: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2359: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2360: 		)return(0);
 2361: 	    break;
 2362:         case XML_REGEXP_NOTREALCHAR: /* \W */
 2363: 	    break;
 2364: 	/*
 2365: 	 * at that point we know both type 1 and type2 are from
 2366: 	 * character categories are ordered and are different,
 2367: 	 * it becomes simple because this is a partition
 2368: 	 */
 2369:         case XML_REGEXP_LETTER:
 2370: 	    if (type2 <= XML_REGEXP_LETTER_OTHERS)
 2371: 	        return(1);
 2372: 	    return(0);
 2373:         case XML_REGEXP_LETTER_UPPERCASE:
 2374:         case XML_REGEXP_LETTER_LOWERCASE:
 2375:         case XML_REGEXP_LETTER_TITLECASE:
 2376:         case XML_REGEXP_LETTER_MODIFIER:
 2377:         case XML_REGEXP_LETTER_OTHERS:
 2378: 	    return(0);
 2379:         case XML_REGEXP_MARK:
 2380: 	    if (type2 <= XML_REGEXP_MARK_ENCLOSING)
 2381: 	        return(1);
 2382: 	    return(0);
 2383:         case XML_REGEXP_MARK_NONSPACING:
 2384:         case XML_REGEXP_MARK_SPACECOMBINING:
 2385:         case XML_REGEXP_MARK_ENCLOSING:
 2386: 	    return(0);
 2387:         case XML_REGEXP_NUMBER:
 2388: 	    if (type2 <= XML_REGEXP_NUMBER_OTHERS)
 2389: 	        return(1);
 2390: 	    return(0);
 2391:         case XML_REGEXP_NUMBER_DECIMAL:
 2392:         case XML_REGEXP_NUMBER_LETTER:
 2393:         case XML_REGEXP_NUMBER_OTHERS:
 2394: 	    return(0);
 2395:         case XML_REGEXP_PUNCT:
 2396: 	    if (type2 <= XML_REGEXP_PUNCT_OTHERS)
 2397: 	        return(1);
 2398: 	    return(0);
 2399:         case XML_REGEXP_PUNCT_CONNECTOR:
 2400:         case XML_REGEXP_PUNCT_DASH:
 2401:         case XML_REGEXP_PUNCT_OPEN:
 2402:         case XML_REGEXP_PUNCT_CLOSE:
 2403:         case XML_REGEXP_PUNCT_INITQUOTE:
 2404:         case XML_REGEXP_PUNCT_FINQUOTE:
 2405:         case XML_REGEXP_PUNCT_OTHERS:
 2406: 	    return(0);
 2407:         case XML_REGEXP_SEPAR:
 2408: 	    if (type2 <= XML_REGEXP_SEPAR_PARA)
 2409: 	        return(1);
 2410: 	    return(0);
 2411:         case XML_REGEXP_SEPAR_SPACE:
 2412:         case XML_REGEXP_SEPAR_LINE:
 2413:         case XML_REGEXP_SEPAR_PARA:
 2414: 	    return(0);
 2415:         case XML_REGEXP_SYMBOL:
 2416: 	    if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
 2417: 	        return(1);
 2418: 	    return(0);
 2419:         case XML_REGEXP_SYMBOL_MATH:
 2420:         case XML_REGEXP_SYMBOL_CURRENCY:
 2421:         case XML_REGEXP_SYMBOL_MODIFIER:
 2422:         case XML_REGEXP_SYMBOL_OTHERS:
 2423: 	    return(0);
 2424:         case XML_REGEXP_OTHER:
 2425: 	    if (type2 <= XML_REGEXP_OTHER_NA)
 2426: 	        return(1);
 2427: 	    return(0);
 2428:         case XML_REGEXP_OTHER_CONTROL:
 2429:         case XML_REGEXP_OTHER_FORMAT:
 2430:         case XML_REGEXP_OTHER_PRIVATE:
 2431:         case XML_REGEXP_OTHER_NA:
 2432: 	    return(0);
 2433: 	default:
 2434: 	    break;
 2435:     }
 2436:     return(1);
 2437: }
 2438: 
 2439: /**
 2440:  * xmlFAEqualAtoms:
 2441:  * @atom1:  an atom
 2442:  * @atom2:  an atom
 2443:  * @deep: if not set only compare string pointers
 2444:  *
 2445:  * Compares two atoms to check whether they are the same exactly
 2446:  * this is used to remove equivalent transitions
 2447:  *
 2448:  * Returns 1 if same and 0 otherwise
 2449:  */
 2450: static int
 2451: xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
 2452:     int ret = 0;
 2453: 
 2454:     if (atom1 == atom2)
 2455: 	return(1);
 2456:     if ((atom1 == NULL) || (atom2 == NULL))
 2457: 	return(0);
 2458: 
 2459:     if (atom1->type != atom2->type)
 2460:         return(0);
 2461:     switch (atom1->type) {
 2462:         case XML_REGEXP_EPSILON:
 2463: 	    ret = 0;
 2464: 	    break;
 2465:         case XML_REGEXP_STRING:
 2466:             if (!deep)
 2467:                 ret = (atom1->valuep == atom2->valuep);
 2468:             else
 2469:                 ret = xmlStrEqual((xmlChar *)atom1->valuep,
 2470:                                   (xmlChar *)atom2->valuep);
 2471: 	    break;
 2472:         case XML_REGEXP_CHARVAL:
 2473: 	    ret = (atom1->codepoint == atom2->codepoint);
 2474: 	    break;
 2475: 	case XML_REGEXP_RANGES:
 2476: 	    /* too hard to do in the general case */
 2477: 	    ret = 0;
 2478: 	default:
 2479: 	    break;
 2480:     }
 2481:     return(ret);
 2482: }
 2483: 
 2484: /**
 2485:  * xmlFACompareAtoms:
 2486:  * @atom1:  an atom
 2487:  * @atom2:  an atom
 2488:  * @deep: if not set only compare string pointers
 2489:  *
 2490:  * Compares two atoms to check whether they intersect in some ways,
 2491:  * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
 2492:  *
 2493:  * Returns 1 if yes and 0 otherwise
 2494:  */
 2495: static int
 2496: xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
 2497:     int ret = 1;
 2498: 
 2499:     if (atom1 == atom2)
 2500: 	return(1);
 2501:     if ((atom1 == NULL) || (atom2 == NULL))
 2502: 	return(0);
 2503: 
 2504:     if ((atom1->type == XML_REGEXP_ANYCHAR) ||
 2505:         (atom2->type == XML_REGEXP_ANYCHAR))
 2506: 	return(1);
 2507: 
 2508:     if (atom1->type > atom2->type) {
 2509: 	xmlRegAtomPtr tmp;
 2510: 	tmp = atom1;
 2511: 	atom1 = atom2;
 2512: 	atom2 = tmp;
 2513:     }
 2514:     if (atom1->type != atom2->type) {
 2515:         ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
 2516: 	/* if they can't intersect at the type level break now */
 2517: 	if (ret == 0)
 2518: 	    return(0);
 2519:     }
 2520:     switch (atom1->type) {
 2521:         case XML_REGEXP_STRING:
 2522:             if (!deep)
 2523:                 ret = (atom1->valuep != atom2->valuep);
 2524:             else
 2525:                 ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
 2526:                                              (xmlChar *)atom2->valuep);
 2527: 	    break;
 2528:         case XML_REGEXP_EPSILON:
 2529: 	    goto not_determinist;
 2530:         case XML_REGEXP_CHARVAL:
 2531: 	    if (atom2->type == XML_REGEXP_CHARVAL) {
 2532: 		ret = (atom1->codepoint == atom2->codepoint);
 2533: 	    } else {
 2534: 	        ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
 2535: 		if (ret < 0)
 2536: 		    ret = 1;
 2537: 	    }
 2538: 	    break;
 2539:         case XML_REGEXP_RANGES:
 2540: 	    if (atom2->type == XML_REGEXP_RANGES) {
 2541: 	        int i, j, res;
 2542: 		xmlRegRangePtr r1, r2;
 2543: 
 2544: 		/*
 2545: 		 * need to check that none of the ranges eventually matches
 2546: 		 */
 2547: 		for (i = 0;i < atom1->nbRanges;i++) {
 2548: 		    for (j = 0;j < atom2->nbRanges;j++) {
 2549: 			r1 = atom1->ranges[i];
 2550: 			r2 = atom2->ranges[j];
 2551: 			res = xmlFACompareRanges(r1, r2);
 2552: 			if (res == 1) {
 2553: 			    ret = 1;
 2554: 			    goto done;
 2555: 			}
 2556: 		    }
 2557: 		}
 2558: 		ret = 0;
 2559: 	    }
 2560: 	    break;
 2561: 	default:
 2562: 	    goto not_determinist;
 2563:     }
 2564: done:
 2565:     if (atom1->neg != atom2->neg) {
 2566:         ret = !ret;
 2567:     }
 2568:     if (ret == 0)
 2569:         return(0);
 2570: not_determinist:
 2571:     return(1);
 2572: }
 2573: 
 2574: /**
 2575:  * xmlFARecurseDeterminism:
 2576:  * @ctxt:  a regexp parser context
 2577:  *
 2578:  * Check whether the associated regexp is determinist,
 2579:  * should be called after xmlFAEliminateEpsilonTransitions()
 2580:  *
 2581:  */
 2582: static int
 2583: xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 2584: 	                 int to, xmlRegAtomPtr atom) {
 2585:     int ret = 1;
 2586:     int res;
 2587:     int transnr, nbTrans;
 2588:     xmlRegTransPtr t1;
 2589:     int deep = 1;
 2590: 
 2591:     if (state == NULL)
 2592: 	return(ret);
 2593:     if (state->markd == XML_REGEXP_MARK_VISITED)
 2594: 	return(ret);
 2595: 
 2596:     if (ctxt->flags & AM_AUTOMATA_RNG)
 2597:         deep = 0;
 2598: 
 2599:     /*
 2600:      * don't recurse on transitions potentially added in the course of
 2601:      * the elimination.
 2602:      */
 2603:     nbTrans = state->nbTrans;
 2604:     for (transnr = 0;transnr < nbTrans;transnr++) {
 2605: 	t1 = &(state->trans[transnr]);
 2606: 	/*
 2607: 	 * check transitions conflicting with the one looked at
 2608: 	 */
 2609: 	if (t1->atom == NULL) {
 2610: 	    if (t1->to < 0)
 2611: 		continue;
 2612: 	    state->markd = XML_REGEXP_MARK_VISITED;
 2613: 	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 2614: 		                           to, atom);
 2615: 	    state->markd = 0;
 2616: 	    if (res == 0) {
 2617: 	        ret = 0;
 2618: 		/* t1->nd = 1; */
 2619: 	    }
 2620: 	    continue;
 2621: 	}
 2622: 	if (t1->to != to)
 2623: 	    continue;
 2624: 	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
 2625: 	    ret = 0;
 2626: 	    /* mark the transition as non-deterministic */
 2627: 	    t1->nd = 1;
 2628: 	}
 2629:     }
 2630:     return(ret);
 2631: }
 2632: 
 2633: /**
 2634:  * xmlFAComputesDeterminism:
 2635:  * @ctxt:  a regexp parser context
 2636:  *
 2637:  * Check whether the associated regexp is determinist,
 2638:  * should be called after xmlFAEliminateEpsilonTransitions()
 2639:  *
 2640:  */
 2641: static int
 2642: xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 2643:     int statenr, transnr;
 2644:     xmlRegStatePtr state;
 2645:     xmlRegTransPtr t1, t2, last;
 2646:     int i;
 2647:     int ret = 1;
 2648:     int deep = 1;
 2649: 
 2650: #ifdef DEBUG_REGEXP_GRAPH
 2651:     printf("xmlFAComputesDeterminism\n");
 2652:     xmlRegPrintCtxt(stdout, ctxt);
 2653: #endif
 2654:     if (ctxt->determinist != -1)
 2655: 	return(ctxt->determinist);
 2656: 
 2657:     if (ctxt->flags & AM_AUTOMATA_RNG)
 2658:         deep = 0;
 2659: 
 2660:     /*
 2661:      * First cleanup the automata removing cancelled transitions
 2662:      */
 2663:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2664: 	state = ctxt->states[statenr];
 2665: 	if (state == NULL)
 2666: 	    continue;
 2667: 	if (state->nbTrans < 2)
 2668: 	    continue;
 2669: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2670: 	    t1 = &(state->trans[transnr]);
 2671: 	    /*
 2672: 	     * Determinism checks in case of counted or all transitions
 2673: 	     * will have to be handled separately
 2674: 	     */
 2675: 	    if (t1->atom == NULL) {
 2676: 		/* t1->nd = 1; */
 2677: 		continue;
 2678: 	    }
 2679: 	    if (t1->to == -1) /* eliminated */
 2680: 		continue;
 2681: 	    for (i = 0;i < transnr;i++) {
 2682: 		t2 = &(state->trans[i]);
 2683: 		if (t2->to == -1) /* eliminated */
 2684: 		    continue;
 2685: 		if (t2->atom != NULL) {
 2686: 		    if (t1->to == t2->to) {
 2687:                         /*
 2688:                          * Here we use deep because we want to keep the
 2689:                          * transitions which indicate a conflict
 2690:                          */
 2691: 			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
 2692:                             (t1->counter == t2->counter) &&
 2693:                             (t1->count == t2->count))
 2694: 			    t2->to = -1; /* eliminated */
 2695: 		    }
 2696: 		}
 2697: 	    }
 2698: 	}
 2699:     }
 2700: 
 2701:     /*
 2702:      * Check for all states that there aren't 2 transitions
 2703:      * with the same atom and a different target.
 2704:      */
 2705:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2706: 	state = ctxt->states[statenr];
 2707: 	if (state == NULL)
 2708: 	    continue;
 2709: 	if (state->nbTrans < 2)
 2710: 	    continue;
 2711: 	last = NULL;
 2712: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2713: 	    t1 = &(state->trans[transnr]);
 2714: 	    /*
 2715: 	     * Determinism checks in case of counted or all transitions
 2716: 	     * will have to be handled separately
 2717: 	     */
 2718: 	    if (t1->atom == NULL) {
 2719: 		continue;
 2720: 	    }
 2721: 	    if (t1->to == -1) /* eliminated */
 2722: 		continue;
 2723: 	    for (i = 0;i < transnr;i++) {
 2724: 		t2 = &(state->trans[i]);
 2725: 		if (t2->to == -1) /* eliminated */
 2726: 		    continue;
 2727: 		if (t2->atom != NULL) {
 2728:                     /*
 2729:                      * But here we don't use deep because we want to
 2730:                      * find transitions which indicate a conflict
 2731:                      */
 2732: 		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
 2733: 			ret = 0;
 2734: 			/* mark the transitions as non-deterministic ones */
 2735: 			t1->nd = 1;
 2736: 			t2->nd = 1;
 2737: 			last = t1;
 2738: 		    }
 2739: 		} else if (t1->to != -1) {
 2740: 		    /*
 2741: 		     * do the closure in case of remaining specific
 2742: 		     * epsilon transitions like choices or all
 2743: 		     */
 2744: 		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 2745: 						   t2->to, t2->atom);
 2746: 		    /* don't shortcut the computation so all non deterministic
 2747: 		       transition get marked down
 2748: 		    if (ret == 0)
 2749: 			return(0);
 2750: 		     */
 2751: 		    if (ret == 0) {
 2752: 			t1->nd = 1;
 2753: 			/* t2->nd = 1; */
 2754: 			last = t1;
 2755: 		    }
 2756: 		}
 2757: 	    }
 2758: 	    /* don't shortcut the computation so all non deterministic
 2759: 	       transition get marked down
 2760: 	    if (ret == 0)
 2761: 		break; */
 2762: 	}
 2763: 
 2764: 	/*
 2765: 	 * mark specifically the last non-deterministic transition
 2766: 	 * from a state since there is no need to set-up rollback
 2767: 	 * from it
 2768: 	 */
 2769: 	if (last != NULL) {
 2770: 	    last->nd = 2;
 2771: 	}
 2772: 
 2773: 	/* don't shortcut the computation so all non deterministic
 2774: 	   transition get marked down
 2775: 	if (ret == 0)
 2776: 	    break; */
 2777:     }
 2778: 
 2779:     ctxt->determinist = ret;
 2780:     return(ret);
 2781: }
 2782: 
 2783: /************************************************************************
 2784:  *									*
 2785:  *	Routines to check input against transition atoms		*
 2786:  *									*
 2787:  ************************************************************************/
 2788: 
 2789: static int
 2790: xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
 2791: 	                  int start, int end, const xmlChar *blockName) {
 2792:     int ret = 0;
 2793: 
 2794:     switch (type) {
 2795:         case XML_REGEXP_STRING:
 2796:         case XML_REGEXP_SUBREG:
 2797:         case XML_REGEXP_RANGES:
 2798:         case XML_REGEXP_EPSILON:
 2799: 	    return(-1);
 2800:         case XML_REGEXP_ANYCHAR:
 2801: 	    ret = ((codepoint != '\n') && (codepoint != '\r'));
 2802: 	    break;
 2803:         case XML_REGEXP_CHARVAL:
 2804: 	    ret = ((codepoint >= start) && (codepoint <= end));
 2805: 	    break;
 2806:         case XML_REGEXP_NOTSPACE:
 2807: 	    neg = !neg;
 2808:         case XML_REGEXP_ANYSPACE:
 2809: 	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
 2810: 		   (codepoint == '\t') || (codepoint == ' '));
 2811: 	    break;
 2812:         case XML_REGEXP_NOTINITNAME:
 2813: 	    neg = !neg;
 2814:         case XML_REGEXP_INITNAME:
 2815: 	    ret = (IS_LETTER(codepoint) ||
 2816: 		   (codepoint == '_') || (codepoint == ':'));
 2817: 	    break;
 2818:         case XML_REGEXP_NOTNAMECHAR:
 2819: 	    neg = !neg;
 2820:         case XML_REGEXP_NAMECHAR:
 2821: 	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
 2822: 		   (codepoint == '.') || (codepoint == '-') ||
 2823: 		   (codepoint == '_') || (codepoint == ':') ||
 2824: 		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
 2825: 	    break;
 2826:         case XML_REGEXP_NOTDECIMAL:
 2827: 	    neg = !neg;
 2828:         case XML_REGEXP_DECIMAL:
 2829: 	    ret = xmlUCSIsCatNd(codepoint);
 2830: 	    break;
 2831:         case XML_REGEXP_REALCHAR:
 2832: 	    neg = !neg;
 2833:         case XML_REGEXP_NOTREALCHAR:
 2834: 	    ret = xmlUCSIsCatP(codepoint);
 2835: 	    if (ret == 0)
 2836: 		ret = xmlUCSIsCatZ(codepoint);
 2837: 	    if (ret == 0)
 2838: 		ret = xmlUCSIsCatC(codepoint);
 2839: 	    break;
 2840:         case XML_REGEXP_LETTER:
 2841: 	    ret = xmlUCSIsCatL(codepoint);
 2842: 	    break;
 2843:         case XML_REGEXP_LETTER_UPPERCASE:
 2844: 	    ret = xmlUCSIsCatLu(codepoint);
 2845: 	    break;
 2846:         case XML_REGEXP_LETTER_LOWERCASE:
 2847: 	    ret = xmlUCSIsCatLl(codepoint);
 2848: 	    break;
 2849:         case XML_REGEXP_LETTER_TITLECASE:
 2850: 	    ret = xmlUCSIsCatLt(codepoint);
 2851: 	    break;
 2852:         case XML_REGEXP_LETTER_MODIFIER:
 2853: 	    ret = xmlUCSIsCatLm(codepoint);
 2854: 	    break;
 2855:         case XML_REGEXP_LETTER_OTHERS:
 2856: 	    ret = xmlUCSIsCatLo(codepoint);
 2857: 	    break;
 2858:         case XML_REGEXP_MARK:
 2859: 	    ret = xmlUCSIsCatM(codepoint);
 2860: 	    break;
 2861:         case XML_REGEXP_MARK_NONSPACING:
 2862: 	    ret = xmlUCSIsCatMn(codepoint);
 2863: 	    break;
 2864:         case XML_REGEXP_MARK_SPACECOMBINING:
 2865: 	    ret = xmlUCSIsCatMc(codepoint);
 2866: 	    break;
 2867:         case XML_REGEXP_MARK_ENCLOSING:
 2868: 	    ret = xmlUCSIsCatMe(codepoint);
 2869: 	    break;
 2870:         case XML_REGEXP_NUMBER:
 2871: 	    ret = xmlUCSIsCatN(codepoint);
 2872: 	    break;
 2873:         case XML_REGEXP_NUMBER_DECIMAL:
 2874: 	    ret = xmlUCSIsCatNd(codepoint);
 2875: 	    break;
 2876:         case XML_REGEXP_NUMBER_LETTER:
 2877: 	    ret = xmlUCSIsCatNl(codepoint);
 2878: 	    break;
 2879:         case XML_REGEXP_NUMBER_OTHERS:
 2880: 	    ret = xmlUCSIsCatNo(codepoint);
 2881: 	    break;
 2882:         case XML_REGEXP_PUNCT:
 2883: 	    ret = xmlUCSIsCatP(codepoint);
 2884: 	    break;
 2885:         case XML_REGEXP_PUNCT_CONNECTOR:
 2886: 	    ret = xmlUCSIsCatPc(codepoint);
 2887: 	    break;
 2888:         case XML_REGEXP_PUNCT_DASH:
 2889: 	    ret = xmlUCSIsCatPd(codepoint);
 2890: 	    break;
 2891:         case XML_REGEXP_PUNCT_OPEN:
 2892: 	    ret = xmlUCSIsCatPs(codepoint);
 2893: 	    break;
 2894:         case XML_REGEXP_PUNCT_CLOSE:
 2895: 	    ret = xmlUCSIsCatPe(codepoint);
 2896: 	    break;
 2897:         case XML_REGEXP_PUNCT_INITQUOTE:
 2898: 	    ret = xmlUCSIsCatPi(codepoint);
 2899: 	    break;
 2900:         case XML_REGEXP_PUNCT_FINQUOTE:
 2901: 	    ret = xmlUCSIsCatPf(codepoint);
 2902: 	    break;
 2903:         case XML_REGEXP_PUNCT_OTHERS:
 2904: 	    ret = xmlUCSIsCatPo(codepoint);
 2905: 	    break;
 2906:         case XML_REGEXP_SEPAR:
 2907: 	    ret = xmlUCSIsCatZ(codepoint);
 2908: 	    break;
 2909:         case XML_REGEXP_SEPAR_SPACE:
 2910: 	    ret = xmlUCSIsCatZs(codepoint);
 2911: 	    break;
 2912:         case XML_REGEXP_SEPAR_LINE:
 2913: 	    ret = xmlUCSIsCatZl(codepoint);
 2914: 	    break;
 2915:         case XML_REGEXP_SEPAR_PARA:
 2916: 	    ret = xmlUCSIsCatZp(codepoint);
 2917: 	    break;
 2918:         case XML_REGEXP_SYMBOL:
 2919: 	    ret = xmlUCSIsCatS(codepoint);
 2920: 	    break;
 2921:         case XML_REGEXP_SYMBOL_MATH:
 2922: 	    ret = xmlUCSIsCatSm(codepoint);
 2923: 	    break;
 2924:         case XML_REGEXP_SYMBOL_CURRENCY:
 2925: 	    ret = xmlUCSIsCatSc(codepoint);
 2926: 	    break;
 2927:         case XML_REGEXP_SYMBOL_MODIFIER:
 2928: 	    ret = xmlUCSIsCatSk(codepoint);
 2929: 	    break;
 2930:         case XML_REGEXP_SYMBOL_OTHERS:
 2931: 	    ret = xmlUCSIsCatSo(codepoint);
 2932: 	    break;
 2933:         case XML_REGEXP_OTHER:
 2934: 	    ret = xmlUCSIsCatC(codepoint);
 2935: 	    break;
 2936:         case XML_REGEXP_OTHER_CONTROL:
 2937: 	    ret = xmlUCSIsCatCc(codepoint);
 2938: 	    break;
 2939:         case XML_REGEXP_OTHER_FORMAT:
 2940: 	    ret = xmlUCSIsCatCf(codepoint);
 2941: 	    break;
 2942:         case XML_REGEXP_OTHER_PRIVATE:
 2943: 	    ret = xmlUCSIsCatCo(codepoint);
 2944: 	    break;
 2945:         case XML_REGEXP_OTHER_NA:
 2946: 	    /* ret = xmlUCSIsCatCn(codepoint); */
 2947: 	    /* Seems it doesn't exist anymore in recent Unicode releases */
 2948: 	    ret = 0;
 2949: 	    break;
 2950:         case XML_REGEXP_BLOCK_NAME:
 2951: 	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
 2952: 	    break;
 2953:     }
 2954:     if (neg)
 2955: 	return(!ret);
 2956:     return(ret);
 2957: }
 2958: 
 2959: static int
 2960: xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
 2961:     int i, ret = 0;
 2962:     xmlRegRangePtr range;
 2963: 
 2964:     if ((atom == NULL) || (!IS_CHAR(codepoint)))
 2965: 	return(-1);
 2966: 
 2967:     switch (atom->type) {
 2968:         case XML_REGEXP_SUBREG:
 2969:         case XML_REGEXP_EPSILON:
 2970: 	    return(-1);
 2971:         case XML_REGEXP_CHARVAL:
 2972:             return(codepoint == atom->codepoint);
 2973:         case XML_REGEXP_RANGES: {
 2974: 	    int accept = 0;
 2975: 
 2976: 	    for (i = 0;i < atom->nbRanges;i++) {
 2977: 		range = atom->ranges[i];
 2978: 		if (range->neg == 2) {
 2979: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2980: 						0, range->start, range->end,
 2981: 						range->blockName);
 2982: 		    if (ret != 0)
 2983: 			return(0); /* excluded char */
 2984: 		} else if (range->neg) {
 2985: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2986: 						0, range->start, range->end,
 2987: 						range->blockName);
 2988: 		    if (ret == 0)
 2989: 		        accept = 1;
 2990: 		    else
 2991: 		        return(0);
 2992: 		} else {
 2993: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2994: 						0, range->start, range->end,
 2995: 						range->blockName);
 2996: 		    if (ret != 0)
 2997: 			accept = 1; /* might still be excluded */
 2998: 		}
 2999: 	    }
 3000: 	    return(accept);
 3001: 	}
 3002:         case XML_REGEXP_STRING:
 3003: 	    printf("TODO: XML_REGEXP_STRING\n");
 3004: 	    return(-1);
 3005:         case XML_REGEXP_ANYCHAR:
 3006:         case XML_REGEXP_ANYSPACE:
 3007:         case XML_REGEXP_NOTSPACE:
 3008:         case XML_REGEXP_INITNAME:
 3009:         case XML_REGEXP_NOTINITNAME:
 3010:         case XML_REGEXP_NAMECHAR:
 3011:         case XML_REGEXP_NOTNAMECHAR:
 3012:         case XML_REGEXP_DECIMAL:
 3013:         case XML_REGEXP_NOTDECIMAL:
 3014:         case XML_REGEXP_REALCHAR:
 3015:         case XML_REGEXP_NOTREALCHAR:
 3016:         case XML_REGEXP_LETTER:
 3017:         case XML_REGEXP_LETTER_UPPERCASE:
 3018:         case XML_REGEXP_LETTER_LOWERCASE:
 3019:         case XML_REGEXP_LETTER_TITLECASE:
 3020:         case XML_REGEXP_LETTER_MODIFIER:
 3021:         case XML_REGEXP_LETTER_OTHERS:
 3022:         case XML_REGEXP_MARK:
 3023:         case XML_REGEXP_MARK_NONSPACING:
 3024:         case XML_REGEXP_MARK_SPACECOMBINING:
 3025:         case XML_REGEXP_MARK_ENCLOSING:
 3026:         case XML_REGEXP_NUMBER:
 3027:         case XML_REGEXP_NUMBER_DECIMAL:
 3028:         case XML_REGEXP_NUMBER_LETTER:
 3029:         case XML_REGEXP_NUMBER_OTHERS:
 3030:         case XML_REGEXP_PUNCT:
 3031:         case XML_REGEXP_PUNCT_CONNECTOR:
 3032:         case XML_REGEXP_PUNCT_DASH:
 3033:         case XML_REGEXP_PUNCT_OPEN:
 3034:         case XML_REGEXP_PUNCT_CLOSE:
 3035:         case XML_REGEXP_PUNCT_INITQUOTE:
 3036:         case XML_REGEXP_PUNCT_FINQUOTE:
 3037:         case XML_REGEXP_PUNCT_OTHERS:
 3038:         case XML_REGEXP_SEPAR:
 3039:         case XML_REGEXP_SEPAR_SPACE:
 3040:         case XML_REGEXP_SEPAR_LINE:
 3041:         case XML_REGEXP_SEPAR_PARA:
 3042:         case XML_REGEXP_SYMBOL:
 3043:         case XML_REGEXP_SYMBOL_MATH:
 3044:         case XML_REGEXP_SYMBOL_CURRENCY:
 3045:         case XML_REGEXP_SYMBOL_MODIFIER:
 3046:         case XML_REGEXP_SYMBOL_OTHERS:
 3047:         case XML_REGEXP_OTHER:
 3048:         case XML_REGEXP_OTHER_CONTROL:
 3049:         case XML_REGEXP_OTHER_FORMAT:
 3050:         case XML_REGEXP_OTHER_PRIVATE:
 3051:         case XML_REGEXP_OTHER_NA:
 3052: 	case XML_REGEXP_BLOCK_NAME:
 3053: 	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
 3054: 		                            (const xmlChar *)atom->valuep);
 3055: 	    if (atom->neg)
 3056: 		ret = !ret;
 3057: 	    break;
 3058:     }
 3059:     return(ret);
 3060: }
 3061: 
 3062: /************************************************************************
 3063:  *									*
 3064:  *	Saving and restoring state of an execution context		*
 3065:  *									*
 3066:  ************************************************************************/
 3067: 
 3068: #ifdef DEBUG_REGEXP_EXEC
 3069: static void
 3070: xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
 3071:     printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
 3072:     if (exec->inputStack != NULL) {
 3073: 	int i;
 3074: 	printf(": ");
 3075: 	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
 3076: 	    printf("%s ", (const char *)
 3077: 	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
 3078:     } else {
 3079: 	printf(": %s", &(exec->inputString[exec->index]));
 3080:     }
 3081:     printf("\n");
 3082: }
 3083: #endif
 3084: 
 3085: static void
 3086: xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
 3087: #ifdef DEBUG_REGEXP_EXEC
 3088:     printf("saving ");
 3089:     exec->transno++;
 3090:     xmlFARegDebugExec(exec);
 3091:     exec->transno--;
 3092: #endif
 3093: #ifdef MAX_PUSH
 3094:     if (exec->nbPush > MAX_PUSH) {
 3095:         return;
 3096:     }
 3097:     exec->nbPush++;
 3098: #endif
 3099: 
 3100:     if (exec->maxRollbacks == 0) {
 3101: 	exec->maxRollbacks = 4;
 3102: 	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
 3103: 		                             sizeof(xmlRegExecRollback));
 3104: 	if (exec->rollbacks == NULL) {
 3105: 	    xmlRegexpErrMemory(NULL, "saving regexp");
 3106: 	    exec->maxRollbacks = 0;
 3107: 	    return;
 3108: 	}
 3109: 	memset(exec->rollbacks, 0,
 3110: 	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
 3111:     } else if (exec->nbRollbacks >= exec->maxRollbacks) {
 3112: 	xmlRegExecRollback *tmp;
 3113: 	int len = exec->maxRollbacks;
 3114: 
 3115: 	exec->maxRollbacks *= 2;
 3116: 	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
 3117: 			exec->maxRollbacks * sizeof(xmlRegExecRollback));
 3118: 	if (tmp == NULL) {
 3119: 	    xmlRegexpErrMemory(NULL, "saving regexp");
 3120: 	    exec->maxRollbacks /= 2;
 3121: 	    return;
 3122: 	}
 3123: 	exec->rollbacks = tmp;
 3124: 	tmp = &exec->rollbacks[len];
 3125: 	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
 3126:     }
 3127:     exec->rollbacks[exec->nbRollbacks].state = exec->state;
 3128:     exec->rollbacks[exec->nbRollbacks].index = exec->index;
 3129:     exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
 3130:     if (exec->comp->nbCounters > 0) {
 3131: 	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3132: 	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
 3133: 		xmlMalloc(exec->comp->nbCounters * sizeof(int));
 3134: 	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3135: 		xmlRegexpErrMemory(NULL, "saving regexp");
 3136: 		exec->status = -5;
 3137: 		return;
 3138: 	    }
 3139: 	}
 3140: 	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
 3141: 	       exec->comp->nbCounters * sizeof(int));
 3142:     }
 3143:     exec->nbRollbacks++;
 3144: }
 3145: 
 3146: static void
 3147: xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
 3148:     if (exec->nbRollbacks <= 0) {
 3149: 	exec->status = -1;
 3150: #ifdef DEBUG_REGEXP_EXEC
 3151: 	printf("rollback failed on empty stack\n");
 3152: #endif
 3153: 	return;
 3154:     }
 3155:     exec->nbRollbacks--;
 3156:     exec->state = exec->rollbacks[exec->nbRollbacks].state;
 3157:     exec->index = exec->rollbacks[exec->nbRollbacks].index;
 3158:     exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
 3159:     if (exec->comp->nbCounters > 0) {
 3160: 	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3161: 	    fprintf(stderr, "exec save: allocation failed");
 3162: 	    exec->status = -6;
 3163: 	    return;
 3164: 	}
 3165: 	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
 3166: 	       exec->comp->nbCounters * sizeof(int));
 3167:     }
 3168: 
 3169: #ifdef DEBUG_REGEXP_EXEC
 3170:     printf("restored ");
 3171:     xmlFARegDebugExec(exec);
 3172: #endif
 3173: }
 3174: 
 3175: /************************************************************************
 3176:  *									*
 3177:  *	Verifier, running an input against a compiled regexp		*
 3178:  *									*
 3179:  ************************************************************************/
 3180: 
 3181: static int
 3182: xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
 3183:     xmlRegExecCtxt execval;
 3184:     xmlRegExecCtxtPtr exec = &execval;
 3185:     int ret, codepoint = 0, len, deter;
 3186: 
 3187:     exec->inputString = content;
 3188:     exec->index = 0;
 3189:     exec->nbPush = 0;
 3190:     exec->determinist = 1;
 3191:     exec->maxRollbacks = 0;
 3192:     exec->nbRollbacks = 0;
 3193:     exec->rollbacks = NULL;
 3194:     exec->status = 0;
 3195:     exec->comp = comp;
 3196:     exec->state = comp->states[0];
 3197:     exec->transno = 0;
 3198:     exec->transcount = 0;
 3199:     exec->inputStack = NULL;
 3200:     exec->inputStackMax = 0;
 3201:     if (comp->nbCounters > 0) {
 3202: 	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
 3203: 	if (exec->counts == NULL) {
 3204: 	    xmlRegexpErrMemory(NULL, "running regexp");
 3205: 	    return(-1);
 3206: 	}
 3207:         memset(exec->counts, 0, comp->nbCounters * sizeof(int));
 3208:     } else
 3209: 	exec->counts = NULL;
 3210:     while ((exec->status == 0) && (exec->state != NULL) &&
 3211: 	   ((exec->inputString[exec->index] != 0) ||
 3212: 	    ((exec->state != NULL) &&
 3213: 	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 3214: 	xmlRegTransPtr trans;
 3215: 	xmlRegAtomPtr atom;
 3216: 
 3217: 	/*
 3218: 	 * If end of input on non-terminal state, rollback, however we may
 3219: 	 * still have epsilon like transition for counted transitions
 3220: 	 * on counters, in that case don't break too early.  Additionally,
 3221: 	 * if we are working on a range like "AB{0,2}", where B is not present,
 3222: 	 * we don't want to break.
 3223: 	 */
 3224: 	len = 1;
 3225: 	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
 3226: 	    /*
 3227: 	     * if there is a transition, we must check if
 3228: 	     *  atom allows minOccurs of 0
 3229: 	     */
 3230: 	    if (exec->transno < exec->state->nbTrans) {
 3231: 	        trans = &exec->state->trans[exec->transno];
 3232: 		if (trans->to >=0) {
 3233: 		    atom = trans->atom;
 3234: 		    if (!((atom->min == 0) && (atom->max > 0)))
 3235: 		        goto rollback;
 3236: 		}
 3237: 	    } else
 3238: 	        goto rollback;
 3239: 	}
 3240: 
 3241: 	exec->transcount = 0;
 3242: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 3243: 	    trans = &exec->state->trans[exec->transno];
 3244: 	    if (trans->to < 0)
 3245: 		continue;
 3246: 	    atom = trans->atom;
 3247: 	    ret = 0;
 3248: 	    deter = 1;
 3249: 	    if (trans->count >= 0) {
 3250: 		int count;
 3251: 		xmlRegCounterPtr counter;
 3252: 
 3253: 		if (exec->counts == NULL) {
 3254: 		    exec->status = -1;
 3255: 		    goto error;
 3256: 		}
 3257: 		/*
 3258: 		 * A counted transition.
 3259: 		 */
 3260: 
 3261: 		count = exec->counts[trans->count];
 3262: 		counter = &exec->comp->counters[trans->count];
 3263: #ifdef DEBUG_REGEXP_EXEC
 3264: 		printf("testing count %d: val %d, min %d, max %d\n",
 3265: 		       trans->count, count, counter->min,  counter->max);
 3266: #endif
 3267: 		ret = ((count >= counter->min) && (count <= counter->max));
 3268: 		if ((ret) && (counter->min != counter->max))
 3269: 		    deter = 0;
 3270: 	    } else if (atom == NULL) {
 3271: 		fprintf(stderr, "epsilon transition left at runtime\n");
 3272: 		exec->status = -2;
 3273: 		break;
 3274: 	    } else if (exec->inputString[exec->index] != 0) {
 3275:                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
 3276: 		ret = xmlRegCheckCharacter(atom, codepoint);
 3277: 		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
 3278: 		    xmlRegStatePtr to = comp->states[trans->to];
 3279: 
 3280: 		    /*
 3281: 		     * this is a multiple input sequence
 3282: 		     * If there is a counter associated increment it now.
 3283: 		     * before potentially saving and rollback
 3284: 		     * do not increment if the counter is already over the
 3285: 		     * maximum limit in which case get to next transition
 3286: 		     */
 3287: 		    if (trans->counter >= 0) {
 3288: 			xmlRegCounterPtr counter;
 3289: 
 3290: 			if ((exec->counts == NULL) ||
 3291: 			    (exec->comp == NULL) ||
 3292: 			    (exec->comp->counters == NULL)) {
 3293: 			    exec->status = -1;
 3294: 			    goto error;
 3295: 			}
 3296: 			counter = &exec->comp->counters[trans->counter];
 3297: 			if (exec->counts[trans->counter] >= counter->max)
 3298: 			    continue; /* for loop on transitions */
 3299: 
 3300: #ifdef DEBUG_REGEXP_EXEC
 3301: 			printf("Increasing count %d\n", trans->counter);
 3302: #endif
 3303: 			exec->counts[trans->counter]++;
 3304: 		    }
 3305: 		    if (exec->state->nbTrans > exec->transno + 1) {
 3306: 			xmlFARegExecSave(exec);
 3307: 		    }
 3308: 		    exec->transcount = 1;
 3309: 		    do {
 3310: 			/*
 3311: 			 * Try to progress as much as possible on the input
 3312: 			 */
 3313: 			if (exec->transcount == atom->max) {
 3314: 			    break;
 3315: 			}
 3316: 			exec->index += len;
 3317: 			/*
 3318: 			 * End of input: stop here
 3319: 			 */
 3320: 			if (exec->inputString[exec->index] == 0) {
 3321: 			    exec->index -= len;
 3322: 			    break;
 3323: 			}
 3324: 			if (exec->transcount >= atom->min) {
 3325: 			    int transno = exec->transno;
 3326: 			    xmlRegStatePtr state = exec->state;
 3327: 
 3328: 			    /*
 3329: 			     * The transition is acceptable save it
 3330: 			     */
 3331: 			    exec->transno = -1; /* trick */
 3332: 			    exec->state = to;
 3333: 			    xmlFARegExecSave(exec);
 3334: 			    exec->transno = transno;
 3335: 			    exec->state = state;
 3336: 			}
 3337: 			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
 3338: 				              len);
 3339: 			ret = xmlRegCheckCharacter(atom, codepoint);
 3340: 			exec->transcount++;
 3341: 		    } while (ret == 1);
 3342: 		    if (exec->transcount < atom->min)
 3343: 			ret = 0;
 3344: 
 3345: 		    /*
 3346: 		     * If the last check failed but one transition was found
 3347: 		     * possible, rollback
 3348: 		     */
 3349: 		    if (ret < 0)
 3350: 			ret = 0;
 3351: 		    if (ret == 0) {
 3352: 			goto rollback;
 3353: 		    }
 3354: 		    if (trans->counter >= 0) {
 3355: 			if (exec->counts == NULL) {
 3356: 			    exec->status = -1;
 3357: 			    goto error;
 3358: 			}
 3359: #ifdef DEBUG_REGEXP_EXEC
 3360: 			printf("Decreasing count %d\n", trans->counter);
 3361: #endif
 3362: 			exec->counts[trans->counter]--;
 3363: 		    }
 3364: 		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
 3365: 		    /*
 3366: 		     * we don't match on the codepoint, but minOccurs of 0
 3367: 		     * says that's ok.  Setting len to 0 inhibits stepping
 3368: 		     * over the codepoint.
 3369: 		     */
 3370: 		    exec->transcount = 1;
 3371: 		    len = 0;
 3372: 		    ret = 1;
 3373: 		}
 3374: 	    } else if ((atom->min == 0) && (atom->max > 0)) {
 3375: 	        /* another spot to match when minOccurs is 0 */
 3376: 		exec->transcount = 1;
 3377: 		len = 0;
 3378: 		ret = 1;
 3379: 	    }
 3380: 	    if (ret == 1) {
 3381: 		if ((trans->nd == 1) ||
 3382: 		    ((trans->count >= 0) && (deter == 0) &&
 3383: 		     (exec->state->nbTrans > exec->transno + 1))) {
 3384: #ifdef DEBUG_REGEXP_EXEC
 3385: 		    if (trans->nd == 1)
 3386: 		        printf("Saving on nd transition atom %d for %c at %d\n",
 3387: 			       trans->atom->no, codepoint, exec->index);
 3388: 		    else
 3389: 		        printf("Saving on counted transition count %d for %c at %d\n",
 3390: 			       trans->count, codepoint, exec->index);
 3391: #endif
 3392: 		    xmlFARegExecSave(exec);
 3393: 		}
 3394: 		if (trans->counter >= 0) {
 3395: 		    xmlRegCounterPtr counter;
 3396: 
 3397:                     /* make sure we don't go over the counter maximum value */
 3398: 		    if ((exec->counts == NULL) ||
 3399: 			(exec->comp == NULL) ||
 3400: 			(exec->comp->counters == NULL)) {
 3401: 			exec->status = -1;
 3402: 			goto error;
 3403: 		    }
 3404: 		    counter = &exec->comp->counters[trans->counter];
 3405: 		    if (exec->counts[trans->counter] >= counter->max)
 3406: 			continue; /* for loop on transitions */
 3407: #ifdef DEBUG_REGEXP_EXEC
 3408: 		    printf("Increasing count %d\n", trans->counter);
 3409: #endif
 3410: 		    exec->counts[trans->counter]++;
 3411: 		}
 3412: 		if ((trans->count >= 0) &&
 3413: 		    (trans->count < REGEXP_ALL_COUNTER)) {
 3414: 		    if (exec->counts == NULL) {
 3415: 		        exec->status = -1;
 3416: 			goto error;
 3417: 		    }
 3418: #ifdef DEBUG_REGEXP_EXEC
 3419: 		    printf("resetting count %d on transition\n",
 3420: 		           trans->count);
 3421: #endif
 3422: 		    exec->counts[trans->count] = 0;
 3423: 		}
 3424: #ifdef DEBUG_REGEXP_EXEC
 3425: 		printf("entering state %d\n", trans->to);
 3426: #endif
 3427: 		exec->state = comp->states[trans->to];
 3428: 		exec->transno = 0;
 3429: 		if (trans->atom != NULL) {
 3430: 		    exec->index += len;
 3431: 		}
 3432: 		goto progress;
 3433: 	    } else if (ret < 0) {
 3434: 		exec->status = -4;
 3435: 		break;
 3436: 	    }
 3437: 	}
 3438: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 3439: rollback:
 3440: 	    /*
 3441: 	     * Failed to find a way out
 3442: 	     */
 3443: 	    exec->determinist = 0;
 3444: #ifdef DEBUG_REGEXP_EXEC
 3445: 	    printf("rollback from state %d on %d:%c\n", exec->state->no,
 3446: 	           codepoint,codepoint);
 3447: #endif
 3448: 	    xmlFARegExecRollBack(exec);
 3449: 	}
 3450: progress:
 3451: 	continue;
 3452:     }
 3453: error:
 3454:     if (exec->rollbacks != NULL) {
 3455: 	if (exec->counts != NULL) {
 3456: 	    int i;
 3457: 
 3458: 	    for (i = 0;i < exec->maxRollbacks;i++)
 3459: 		if (exec->rollbacks[i].counts != NULL)
 3460: 		    xmlFree(exec->rollbacks[i].counts);
 3461: 	}
 3462: 	xmlFree(exec->rollbacks);
 3463:     }
 3464:     if (exec->state == NULL)
 3465:         return(-1);
 3466:     if (exec->counts != NULL)
 3467: 	xmlFree(exec->counts);
 3468:     if (exec->status == 0)
 3469: 	return(1);
 3470:     if (exec->status == -1) {
 3471: 	if (exec->nbPush > MAX_PUSH)
 3472: 	    return(-1);
 3473: 	return(0);
 3474:     }
 3475:     return(exec->status);
 3476: }
 3477: 
 3478: /************************************************************************
 3479:  *									*
 3480:  *	Progressive interface to the verifier one atom at a time	*
 3481:  *									*
 3482:  ************************************************************************/
 3483: #ifdef DEBUG_ERR
 3484: static void testerr(xmlRegExecCtxtPtr exec);
 3485: #endif
 3486: 
 3487: /**
 3488:  * xmlRegNewExecCtxt:
 3489:  * @comp: a precompiled regular expression
 3490:  * @callback: a callback function used for handling progresses in the
 3491:  *            automata matching phase
 3492:  * @data: the context data associated to the callback in this context
 3493:  *
 3494:  * Build a context used for progressive evaluation of a regexp.
 3495:  *
 3496:  * Returns the new context
 3497:  */
 3498: xmlRegExecCtxtPtr
 3499: xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
 3500:     xmlRegExecCtxtPtr exec;
 3501: 
 3502:     if (comp == NULL)
 3503: 	return(NULL);
 3504:     if ((comp->compact == NULL) && (comp->states == NULL))
 3505:         return(NULL);
 3506:     exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
 3507:     if (exec == NULL) {
 3508: 	xmlRegexpErrMemory(NULL, "creating execution context");
 3509: 	return(NULL);
 3510:     }
 3511:     memset(exec, 0, sizeof(xmlRegExecCtxt));
 3512:     exec->inputString = NULL;
 3513:     exec->index = 0;
 3514:     exec->determinist = 1;
 3515:     exec->maxRollbacks = 0;
 3516:     exec->nbRollbacks = 0;
 3517:     exec->rollbacks = NULL;
 3518:     exec->status = 0;
 3519:     exec->comp = comp;
 3520:     if (comp->compact == NULL)
 3521: 	exec->state = comp->states[0];
 3522:     exec->transno = 0;
 3523:     exec->transcount = 0;
 3524:     exec->callback = callback;
 3525:     exec->data = data;
 3526:     if (comp->nbCounters > 0) {
 3527:         /*
 3528: 	 * For error handling, exec->counts is allocated twice the size
 3529: 	 * the second half is used to store the data in case of rollback
 3530: 	 */
 3531: 	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
 3532: 	                                 * 2);
 3533: 	if (exec->counts == NULL) {
 3534: 	    xmlRegexpErrMemory(NULL, "creating execution context");
 3535: 	    xmlFree(exec);
 3536: 	    return(NULL);
 3537: 	}
 3538:         memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
 3539: 	exec->errCounts = &exec->counts[comp->nbCounters];
 3540:     } else {
 3541: 	exec->counts = NULL;
 3542: 	exec->errCounts = NULL;
 3543:     }
 3544:     exec->inputStackMax = 0;
 3545:     exec->inputStackNr = 0;
 3546:     exec->inputStack = NULL;
 3547:     exec->errStateNo = -1;
 3548:     exec->errString = NULL;
 3549:     exec->nbPush = 0;
 3550:     return(exec);
 3551: }
 3552: 
 3553: /**
 3554:  * xmlRegFreeExecCtxt:
 3555:  * @exec: a regular expression evaulation context
 3556:  *
 3557:  * Free the structures associated to a regular expression evaulation context.
 3558:  */
 3559: void
 3560: xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
 3561:     if (exec == NULL)
 3562: 	return;
 3563: 
 3564:     if (exec->rollbacks != NULL) {
 3565: 	if (exec->counts != NULL) {
 3566: 	    int i;
 3567: 
 3568: 	    for (i = 0;i < exec->maxRollbacks;i++)
 3569: 		if (exec->rollbacks[i].counts != NULL)
 3570: 		    xmlFree(exec->rollbacks[i].counts);
 3571: 	}
 3572: 	xmlFree(exec->rollbacks);
 3573:     }
 3574:     if (exec->counts != NULL)
 3575: 	xmlFree(exec->counts);
 3576:     if (exec->inputStack != NULL) {
 3577: 	int i;
 3578: 
 3579: 	for (i = 0;i < exec->inputStackNr;i++) {
 3580: 	    if (exec->inputStack[i].value != NULL)
 3581: 		xmlFree(exec->inputStack[i].value);
 3582: 	}
 3583: 	xmlFree(exec->inputStack);
 3584:     }
 3585:     if (exec->errString != NULL)
 3586:         xmlFree(exec->errString);
 3587:     xmlFree(exec);
 3588: }
 3589: 
 3590: static void
 3591: xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 3592: 	                    void *data) {
 3593: #ifdef DEBUG_PUSH
 3594:     printf("saving value: %d:%s\n", exec->inputStackNr, value);
 3595: #endif
 3596:     if (exec->inputStackMax == 0) {
 3597: 	exec->inputStackMax = 4;
 3598: 	exec->inputStack = (xmlRegInputTokenPtr)
 3599: 	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
 3600: 	if (exec->inputStack == NULL) {
 3601: 	    xmlRegexpErrMemory(NULL, "pushing input string");
 3602: 	    exec->inputStackMax = 0;
 3603: 	    return;
 3604: 	}
 3605:     } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
 3606: 	xmlRegInputTokenPtr tmp;
 3607: 
 3608: 	exec->inputStackMax *= 2;
 3609: 	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
 3610: 			exec->inputStackMax * sizeof(xmlRegInputToken));
 3611: 	if (tmp == NULL) {
 3612: 	    xmlRegexpErrMemory(NULL, "pushing input string");
 3613: 	    exec->inputStackMax /= 2;
 3614: 	    return;
 3615: 	}
 3616: 	exec->inputStack = tmp;
 3617:     }
 3618:     exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
 3619:     exec->inputStack[exec->inputStackNr].data = data;
 3620:     exec->inputStackNr++;
 3621:     exec->inputStack[exec->inputStackNr].value = NULL;
 3622:     exec->inputStack[exec->inputStackNr].data = NULL;
 3623: }
 3624: 
 3625: /**
 3626:  * xmlRegStrEqualWildcard:
 3627:  * @expStr:  the string to be evaluated
 3628:  * @valStr:  the validation string
 3629:  *
 3630:  * Checks if both strings are equal or have the same content. "*"
 3631:  * can be used as a wildcard in @valStr; "|" is used as a seperator of
 3632:  * substrings in both @expStr and @valStr.
 3633:  *
 3634:  * Returns 1 if the comparison is satisfied and the number of substrings
 3635:  * is equal, 0 otherwise.
 3636:  */
 3637: 
 3638: static int
 3639: xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
 3640:     if (expStr == valStr) return(1);
 3641:     if (expStr == NULL) return(0);
 3642:     if (valStr == NULL) return(0);
 3643:     do {
 3644: 	/*
 3645: 	* Eval if we have a wildcard for the current item.
 3646: 	*/
 3647:         if (*expStr != *valStr) {
 3648: 	    /* if one of them starts with a wildcard make valStr be it */
 3649: 	    if (*valStr == '*') {
 3650: 	        const xmlChar *tmp;
 3651: 
 3652: 		tmp = valStr;
 3653: 		valStr = expStr;
 3654: 		expStr = tmp;
 3655: 	    }
 3656: 	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
 3657: 		do {
 3658: 		    if (*valStr == XML_REG_STRING_SEPARATOR)
 3659: 			break;
 3660: 		    valStr++;
 3661: 		} while (*valStr != 0);
 3662: 		continue;
 3663: 	    } else
 3664: 		return(0);
 3665: 	}
 3666: 	expStr++;
 3667: 	valStr++;
 3668:     } while (*valStr != 0);
 3669:     if (*expStr != 0)
 3670: 	return (0);
 3671:     else
 3672: 	return (1);
 3673: }
 3674: 
 3675: /**
 3676:  * xmlRegCompactPushString:
 3677:  * @exec: a regexp execution context
 3678:  * @comp:  the precompiled exec with a compact table
 3679:  * @value: a string token input
 3680:  * @data: data associated to the token to reuse in callbacks
 3681:  *
 3682:  * Push one input token in the execution context
 3683:  *
 3684:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 3685:  *     a negative value in case of error.
 3686:  */
 3687: static int
 3688: xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
 3689: 	                xmlRegexpPtr comp,
 3690: 	                const xmlChar *value,
 3691: 	                void *data) {
 3692:     int state = exec->index;
 3693:     int i, target;
 3694: 
 3695:     if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
 3696: 	return(-1);
 3697: 
 3698:     if (value == NULL) {
 3699: 	/*
 3700: 	 * are we at a final state ?
 3701: 	 */
 3702: 	if (comp->compact[state * (comp->nbstrings + 1)] ==
 3703:             XML_REGEXP_FINAL_STATE)
 3704: 	    return(1);
 3705: 	return(0);
 3706:     }
 3707: 
 3708: #ifdef DEBUG_PUSH
 3709:     printf("value pushed: %s\n", value);
 3710: #endif
 3711: 
 3712:     /*
 3713:      * Examine all outside transitions from current state
 3714:      */
 3715:     for (i = 0;i < comp->nbstrings;i++) {
 3716: 	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 3717: 	if ((target > 0) && (target <= comp->nbstates)) {
 3718: 	    target--; /* to avoid 0 */
 3719: 	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
 3720: 		exec->index = target;
 3721: 		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
 3722: 		    exec->callback(exec->data, value,
 3723: 			  comp->transdata[state * comp->nbstrings + i], data);
 3724: 		}
 3725: #ifdef DEBUG_PUSH
 3726: 		printf("entering state %d\n", target);
 3727: #endif
 3728: 		if (comp->compact[target * (comp->nbstrings + 1)] ==
 3729: 		    XML_REGEXP_SINK_STATE)
 3730: 		    goto error;
 3731: 
 3732: 		if (comp->compact[target * (comp->nbstrings + 1)] ==
 3733: 		    XML_REGEXP_FINAL_STATE)
 3734: 		    return(1);
 3735: 		return(0);
 3736: 	    }
 3737: 	}
 3738:     }
 3739:     /*
 3740:      * Failed to find an exit transition out from current state for the
 3741:      * current token
 3742:      */
 3743: #ifdef DEBUG_PUSH
 3744:     printf("failed to find a transition for %s on state %d\n", value, state);
 3745: #endif
 3746: error:
 3747:     if (exec->errString != NULL)
 3748:         xmlFree(exec->errString);
 3749:     exec->errString = xmlStrdup(value);
 3750:     exec->errStateNo = state;
 3751:     exec->status = -1;
 3752: #ifdef DEBUG_ERR
 3753:     testerr(exec);
 3754: #endif
 3755:     return(-1);
 3756: }
 3757: 
 3758: /**
 3759:  * xmlRegExecPushStringInternal:
 3760:  * @exec: a regexp execution context or NULL to indicate the end
 3761:  * @value: a string token input
 3762:  * @data: data associated to the token to reuse in callbacks
 3763:  * @compound: value was assembled from 2 strings
 3764:  *
 3765:  * Push one input token in the execution context
 3766:  *
 3767:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 3768:  *     a negative value in case of error.
 3769:  */
 3770: static int
 3771: xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
 3772: 	                     void *data, int compound) {
 3773:     xmlRegTransPtr trans;
 3774:     xmlRegAtomPtr atom;
 3775:     int ret;
 3776:     int final = 0;
 3777:     int progress = 1;
 3778: 
 3779:     if (exec == NULL)
 3780: 	return(-1);
 3781:     if (exec->comp == NULL)
 3782: 	return(-1);
 3783:     if (exec->status != 0)
 3784: 	return(exec->status);
 3785: 
 3786:     if (exec->comp->compact != NULL)
 3787: 	return(xmlRegCompactPushString(exec, exec->comp, value, data));
 3788: 
 3789:     if (value == NULL) {
 3790:         if (exec->state->type == XML_REGEXP_FINAL_STATE)
 3791: 	    return(1);
 3792: 	final = 1;
 3793:     }
 3794: 
 3795: #ifdef DEBUG_PUSH
 3796:     printf("value pushed: %s\n", value);
 3797: #endif
 3798:     /*
 3799:      * If we have an active rollback stack push the new value there
 3800:      * and get back to where we were left
 3801:      */
 3802:     if ((value != NULL) && (exec->inputStackNr > 0)) {
 3803: 	xmlFARegExecSaveInputString(exec, value, data);
 3804: 	value = exec->inputStack[exec->index].value;
 3805: 	data = exec->inputStack[exec->index].data;
 3806: #ifdef DEBUG_PUSH
 3807: 	printf("value loaded: %s\n", value);
 3808: #endif
 3809:     }
 3810: 
 3811:     while ((exec->status == 0) &&
 3812: 	   ((value != NULL) ||
 3813: 	    ((final == 1) &&
 3814: 	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 3815: 
 3816: 	/*
 3817: 	 * End of input on non-terminal state, rollback, however we may
 3818: 	 * still have epsilon like transition for counted transitions
 3819: 	 * on counters, in that case don't break too early.
 3820: 	 */
 3821: 	if ((value == NULL) && (exec->counts == NULL))
 3822: 	    goto rollback;
 3823: 
 3824: 	exec->transcount = 0;
 3825: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 3826: 	    trans = &exec->state->trans[exec->transno];
 3827: 	    if (trans->to < 0)
 3828: 		continue;
 3829: 	    atom = trans->atom;
 3830: 	    ret = 0;
 3831: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 3832: 		int i;
 3833: 		int count;
 3834: 		xmlRegTransPtr t;
 3835: 		xmlRegCounterPtr counter;
 3836: 
 3837: 		ret = 0;
 3838: 
 3839: #ifdef DEBUG_PUSH
 3840: 		printf("testing all lax %d\n", trans->count);
 3841: #endif
 3842: 		/*
 3843: 		 * Check all counted transitions from the current state
 3844: 		 */
 3845: 		if ((value == NULL) && (final)) {
 3846: 		    ret = 1;
 3847: 		} else if (value != NULL) {
 3848: 		    for (i = 0;i < exec->state->nbTrans;i++) {
 3849: 			t = &exec->state->trans[i];
 3850: 			if ((t->counter < 0) || (t == trans))
 3851: 			    continue;
 3852: 			counter = &exec->comp->counters[t->counter];
 3853: 			count = exec->counts[t->counter];
 3854: 			if ((count < counter->max) &&
 3855: 		            (t->atom != NULL) &&
 3856: 			    (xmlStrEqual(value, t->atom->valuep))) {
 3857: 			    ret = 0;
 3858: 			    break;
 3859: 			}
 3860: 			if ((count >= counter->min) &&
 3861: 			    (count < counter->max) &&
 3862: 			    (t->atom != NULL) &&
 3863: 			    (xmlStrEqual(value, t->atom->valuep))) {
 3864: 			    ret = 1;
 3865: 			    break;
 3866: 			}
 3867: 		    }
 3868: 		}
 3869: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 3870: 		int i;
 3871: 		int count;
 3872: 		xmlRegTransPtr t;
 3873: 		xmlRegCounterPtr counter;
 3874: 
 3875: 		ret = 1;
 3876: 
 3877: #ifdef DEBUG_PUSH
 3878: 		printf("testing all %d\n", trans->count);
 3879: #endif
 3880: 		/*
 3881: 		 * Check all counted transitions from the current state
 3882: 		 */
 3883: 		for (i = 0;i < exec->state->nbTrans;i++) {
 3884:                     t = &exec->state->trans[i];
 3885: 		    if ((t->counter < 0) || (t == trans))
 3886: 			continue;
 3887:                     counter = &exec->comp->counters[t->counter];
 3888: 		    count = exec->counts[t->counter];
 3889: 		    if ((count < counter->min) || (count > counter->max)) {
 3890: 			ret = 0;
 3891: 			break;
 3892: 		    }
 3893: 		}
 3894: 	    } else if (trans->count >= 0) {
 3895: 		int count;
 3896: 		xmlRegCounterPtr counter;
 3897: 
 3898: 		/*
 3899: 		 * A counted transition.
 3900: 		 */
 3901: 
 3902: 		count = exec->counts[trans->count];
 3903: 		counter = &exec->comp->counters[trans->count];
 3904: #ifdef DEBUG_PUSH
 3905: 		printf("testing count %d: val %d, min %d, max %d\n",
 3906: 		       trans->count, count, counter->min,  counter->max);
 3907: #endif
 3908: 		ret = ((count >= counter->min) && (count <= counter->max));
 3909: 	    } else if (atom == NULL) {
 3910: 		fprintf(stderr, "epsilon transition left at runtime\n");
 3911: 		exec->status = -2;
 3912: 		break;
 3913: 	    } else if (value != NULL) {
 3914: 		ret = xmlRegStrEqualWildcard(atom->valuep, value);
 3915: 		if (atom->neg) {
 3916: 		    ret = !ret;
 3917: 		    if (!compound)
 3918: 		        ret = 0;
 3919: 		}
 3920: 		if ((ret == 1) && (trans->counter >= 0)) {
 3921: 		    xmlRegCounterPtr counter;
 3922: 		    int count;
 3923: 
 3924: 		    count = exec->counts[trans->counter];
 3925: 		    counter = &exec->comp->counters[trans->counter];
 3926: 		    if (count >= counter->max)
 3927: 			ret = 0;
 3928: 		}
 3929: 
 3930: 		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
 3931: 		    xmlRegStatePtr to = exec->comp->states[trans->to];
 3932: 
 3933: 		    /*
 3934: 		     * this is a multiple input sequence
 3935: 		     */
 3936: 		    if (exec->state->nbTrans > exec->transno + 1) {
 3937: 			if (exec->inputStackNr <= 0) {
 3938: 			    xmlFARegExecSaveInputString(exec, value, data);
 3939: 			}
 3940: 			xmlFARegExecSave(exec);
 3941: 		    }
 3942: 		    exec->transcount = 1;
 3943: 		    do {
 3944: 			/*
 3945: 			 * Try to progress as much as possible on the input
 3946: 			 */
 3947: 			if (exec->transcount == atom->max) {
 3948: 			    break;
 3949: 			}
 3950: 			exec->index++;
 3951: 			value = exec->inputStack[exec->index].value;
 3952: 			data = exec->inputStack[exec->index].data;
 3953: #ifdef DEBUG_PUSH
 3954: 			printf("value loaded: %s\n", value);
 3955: #endif
 3956: 
 3957: 			/*
 3958: 			 * End of input: stop here
 3959: 			 */
 3960: 			if (value == NULL) {
 3961: 			    exec->index --;
 3962: 			    break;
 3963: 			}
 3964: 			if (exec->transcount >= atom->min) {
 3965: 			    int transno = exec->transno;
 3966: 			    xmlRegStatePtr state = exec->state;
 3967: 
 3968: 			    /*
 3969: 			     * The transition is acceptable save it
 3970: 			     */
 3971: 			    exec->transno = -1; /* trick */
 3972: 			    exec->state = to;
 3973: 			    if (exec->inputStackNr <= 0) {
 3974: 				xmlFARegExecSaveInputString(exec, value, data);
 3975: 			    }
 3976: 			    xmlFARegExecSave(exec);
 3977: 			    exec->transno = transno;
 3978: 			    exec->state = state;
 3979: 			}
 3980: 			ret = xmlStrEqual(value, atom->valuep);
 3981: 			exec->transcount++;
 3982: 		    } while (ret == 1);
 3983: 		    if (exec->transcount < atom->min)
 3984: 			ret = 0;
 3985: 
 3986: 		    /*
 3987: 		     * If the last check failed but one transition was found
 3988: 		     * possible, rollback
 3989: 		     */
 3990: 		    if (ret < 0)
 3991: 			ret = 0;
 3992: 		    if (ret == 0) {
 3993: 			goto rollback;
 3994: 		    }
 3995: 		}
 3996: 	    }
 3997: 	    if (ret == 1) {
 3998: 		if ((exec->callback != NULL) && (atom != NULL) &&
 3999: 			(data != NULL)) {
 4000: 		    exec->callback(exec->data, atom->valuep,
 4001: 			           atom->data, data);
 4002: 		}
 4003: 		if (exec->state->nbTrans > exec->transno + 1) {
 4004: 		    if (exec->inputStackNr <= 0) {
 4005: 			xmlFARegExecSaveInputString(exec, value, data);
 4006: 		    }
 4007: 		    xmlFARegExecSave(exec);
 4008: 		}
 4009: 		if (trans->counter >= 0) {
 4010: #ifdef DEBUG_PUSH
 4011: 		    printf("Increasing count %d\n", trans->counter);
 4012: #endif
 4013: 		    exec->counts[trans->counter]++;
 4014: 		}
 4015: 		if ((trans->count >= 0) &&
 4016: 		    (trans->count < REGEXP_ALL_COUNTER)) {
 4017: #ifdef DEBUG_REGEXP_EXEC
 4018: 		    printf("resetting count %d on transition\n",
 4019: 		           trans->count);
 4020: #endif
 4021: 		    exec->counts[trans->count] = 0;
 4022: 		}
 4023: #ifdef DEBUG_PUSH
 4024: 		printf("entering state %d\n", trans->to);
 4025: #endif
 4026:                 if ((exec->comp->states[trans->to] != NULL) &&
 4027: 		    (exec->comp->states[trans->to]->type ==
 4028: 		     XML_REGEXP_SINK_STATE)) {
 4029: 		    /*
 4030: 		     * entering a sink state, save the current state as error
 4031: 		     * state.
 4032: 		     */
 4033: 		    if (exec->errString != NULL)
 4034: 			xmlFree(exec->errString);
 4035: 		    exec->errString = xmlStrdup(value);
 4036: 		    exec->errState = exec->state;
 4037: 		    memcpy(exec->errCounts, exec->counts,
 4038: 			   exec->comp->nbCounters * sizeof(int));
 4039: 		}
 4040: 		exec->state = exec->comp->states[trans->to];
 4041: 		exec->transno = 0;
 4042: 		if (trans->atom != NULL) {
 4043: 		    if (exec->inputStack != NULL) {
 4044: 			exec->index++;
 4045: 			if (exec->index < exec->inputStackNr) {
 4046: 			    value = exec->inputStack[exec->index].value;
 4047: 			    data = exec->inputStack[exec->index].data;
 4048: #ifdef DEBUG_PUSH
 4049: 			    printf("value loaded: %s\n", value);
 4050: #endif
 4051: 			} else {
 4052: 			    value = NULL;
 4053: 			    data = NULL;
 4054: #ifdef DEBUG_PUSH
 4055: 			    printf("end of input\n");
 4056: #endif
 4057: 			}
 4058: 		    } else {
 4059: 			value = NULL;
 4060: 			data = NULL;
 4061: #ifdef DEBUG_PUSH
 4062: 			printf("end of input\n");
 4063: #endif
 4064: 		    }
 4065: 		}
 4066: 		goto progress;
 4067: 	    } else if (ret < 0) {
 4068: 		exec->status = -4;
 4069: 		break;
 4070: 	    }
 4071: 	}
 4072: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 4073: rollback:
 4074:             /*
 4075: 	     * if we didn't yet rollback on the current input
 4076: 	     * store the current state as the error state.
 4077: 	     */
 4078: 	    if ((progress) && (exec->state != NULL) &&
 4079: 	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
 4080: 	        progress = 0;
 4081: 		if (exec->errString != NULL)
 4082: 		    xmlFree(exec->errString);
 4083: 		exec->errString = xmlStrdup(value);
 4084: 		exec->errState = exec->state;
 4085: 		memcpy(exec->errCounts, exec->counts,
 4086: 		       exec->comp->nbCounters * sizeof(int));
 4087: 	    }
 4088: 
 4089: 	    /*
 4090: 	     * Failed to find a way out
 4091: 	     */
 4092: 	    exec->determinist = 0;
 4093: 	    xmlFARegExecRollBack(exec);
 4094: 	    if (exec->status == 0) {
 4095: 		value = exec->inputStack[exec->index].value;
 4096: 		data = exec->inputStack[exec->index].data;
 4097: #ifdef DEBUG_PUSH
 4098: 		printf("value loaded: %s\n", value);
 4099: #endif
 4100: 	    }
 4101: 	}
 4102: 	continue;
 4103: progress:
 4104:         progress = 1;
 4105: 	continue;
 4106:     }
 4107:     if (exec->status == 0) {
 4108:         return(exec->state->type == XML_REGEXP_FINAL_STATE);
 4109:     }
 4110: #ifdef DEBUG_ERR
 4111:     if (exec->status < 0) {
 4112: 	testerr(exec);
 4113:     }
 4114: #endif
 4115:     return(exec->status);
 4116: }
 4117: 
 4118: /**
 4119:  * xmlRegExecPushString:
 4120:  * @exec: a regexp execution context or NULL to indicate the end
 4121:  * @value: a string token input
 4122:  * @data: data associated to the token to reuse in callbacks
 4123:  *
 4124:  * Push one input token in the execution context
 4125:  *
 4126:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 4127:  *     a negative value in case of error.
 4128:  */
 4129: int
 4130: xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 4131: 	             void *data) {
 4132:     return(xmlRegExecPushStringInternal(exec, value, data, 0));
 4133: }
 4134: 
 4135: /**
 4136:  * xmlRegExecPushString2:
 4137:  * @exec: a regexp execution context or NULL to indicate the end
 4138:  * @value: the first string token input
 4139:  * @value2: the second string token input
 4140:  * @data: data associated to the token to reuse in callbacks
 4141:  *
 4142:  * Push one input token in the execution context
 4143:  *
 4144:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 4145:  *     a negative value in case of error.
 4146:  */
 4147: int
 4148: xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
 4149:                       const xmlChar *value2, void *data) {
 4150:     xmlChar buf[150];
 4151:     int lenn, lenp, ret;
 4152:     xmlChar *str;
 4153: 
 4154:     if (exec == NULL)
 4155: 	return(-1);
 4156:     if (exec->comp == NULL)
 4157: 	return(-1);
 4158:     if (exec->status != 0)
 4159: 	return(exec->status);
 4160: 
 4161:     if (value2 == NULL)
 4162:         return(xmlRegExecPushString(exec, value, data));
 4163: 
 4164:     lenn = strlen((char *) value2);
 4165:     lenp = strlen((char *) value);
 4166: 
 4167:     if (150 < lenn + lenp + 2) {
 4168: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 4169: 	if (str == NULL) {
 4170: 	    exec->status = -1;
 4171: 	    return(-1);
 4172: 	}
 4173:     } else {
 4174: 	str = buf;
 4175:     }
 4176:     memcpy(&str[0], value, lenp);
 4177:     str[lenp] = XML_REG_STRING_SEPARATOR;
 4178:     memcpy(&str[lenp + 1], value2, lenn);
 4179:     str[lenn + lenp + 1] = 0;
 4180: 
 4181:     if (exec->comp->compact != NULL)
 4182: 	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
 4183:     else
 4184:         ret = xmlRegExecPushStringInternal(exec, str, data, 1);
 4185: 
 4186:     if (str != buf)
 4187:         xmlFree(str);
 4188:     return(ret);
 4189: }
 4190: 
 4191: /**
 4192:  * xmlRegExecGetValues:
 4193:  * @exec: a regexp execution context
 4194:  * @err: error extraction or normal one
 4195:  * @nbval: pointer to the number of accepted values IN/OUT
 4196:  * @nbneg: return number of negative transitions
 4197:  * @values: pointer to the array of acceptable values
 4198:  * @terminal: return value if this was a terminal state
 4199:  *
 4200:  * Extract informations from the regexp execution, internal routine to
 4201:  * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
 4202:  *
 4203:  * Returns: 0 in case of success or -1 in case of error.
 4204:  */
 4205: static int
 4206: xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
 4207:                     int *nbval, int *nbneg,
 4208: 		    xmlChar **values, int *terminal) {
 4209:     int maxval;
 4210:     int nb = 0;
 4211: 
 4212:     if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) ||
 4213:         (values == NULL) || (*nbval <= 0))
 4214:         return(-1);
 4215: 
 4216:     maxval = *nbval;
 4217:     *nbval = 0;
 4218:     *nbneg = 0;
 4219:     if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
 4220:         xmlRegexpPtr comp;
 4221: 	int target, i, state;
 4222: 
 4223:         comp = exec->comp;
 4224: 
 4225: 	if (err) {
 4226: 	    if (exec->errStateNo == -1) return(-1);
 4227: 	    state = exec->errStateNo;
 4228: 	} else {
 4229: 	    state = exec->index;
 4230: 	}
 4231: 	if (terminal != NULL) {
 4232: 	    if (comp->compact[state * (comp->nbstrings + 1)] ==
 4233: 	        XML_REGEXP_FINAL_STATE)
 4234: 		*terminal = 1;
 4235: 	    else
 4236: 		*terminal = 0;
 4237: 	}
 4238: 	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 4239: 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 4240: 	    if ((target > 0) && (target <= comp->nbstates) &&
 4241: 	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
 4242: 		 XML_REGEXP_SINK_STATE)) {
 4243: 	        values[nb++] = comp->stringMap[i];
 4244: 		(*nbval)++;
 4245: 	    }
 4246: 	}
 4247: 	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 4248: 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 4249: 	    if ((target > 0) && (target <= comp->nbstates) &&
 4250: 	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
 4251: 		 XML_REGEXP_SINK_STATE)) {
 4252: 	        values[nb++] = comp->stringMap[i];
 4253: 		(*nbneg)++;
 4254: 	    }
 4255: 	}
 4256:     } else {
 4257:         int transno;
 4258: 	xmlRegTransPtr trans;
 4259: 	xmlRegAtomPtr atom;
 4260: 	xmlRegStatePtr state;
 4261: 
 4262: 	if (terminal != NULL) {
 4263: 	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
 4264: 		*terminal = 1;
 4265: 	    else
 4266: 		*terminal = 0;
 4267: 	}
 4268: 
 4269: 	if (err) {
 4270: 	    if (exec->errState == NULL) return(-1);
 4271: 	    state = exec->errState;
 4272: 	} else {
 4273: 	    if (exec->state == NULL) return(-1);
 4274: 	    state = exec->state;
 4275: 	}
 4276: 	for (transno = 0;
 4277: 	     (transno < state->nbTrans) && (nb < maxval);
 4278: 	     transno++) {
 4279: 	    trans = &state->trans[transno];
 4280: 	    if (trans->to < 0)
 4281: 		continue;
 4282: 	    atom = trans->atom;
 4283: 	    if ((atom == NULL) || (atom->valuep == NULL))
 4284: 		continue;
 4285: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 4286: 	        /* this should not be reached but ... */
 4287: 	        TODO;
 4288: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 4289: 	        /* this should not be reached but ... */
 4290: 	        TODO;
 4291: 	    } else if (trans->counter >= 0) {
 4292: 		xmlRegCounterPtr counter = NULL;
 4293: 		int count;
 4294: 
 4295: 		if (err)
 4296: 		    count = exec->errCounts[trans->counter];
 4297: 		else
 4298: 		    count = exec->counts[trans->counter];
 4299: 		if (exec->comp != NULL)
 4300: 		    counter = &exec->comp->counters[trans->counter];
 4301: 		if ((counter == NULL) || (count < counter->max)) {
 4302: 		    if (atom->neg)
 4303: 			values[nb++] = (xmlChar *) atom->valuep2;
 4304: 		    else
 4305: 			values[nb++] = (xmlChar *) atom->valuep;
 4306: 		    (*nbval)++;
 4307: 		}
 4308: 	    } else {
 4309:                 if ((exec->comp->states[trans->to] != NULL) &&
 4310: 		    (exec->comp->states[trans->to]->type !=
 4311: 		     XML_REGEXP_SINK_STATE)) {
 4312: 		    if (atom->neg)
 4313: 			values[nb++] = (xmlChar *) atom->valuep2;
 4314: 		    else
 4315: 			values[nb++] = (xmlChar *) atom->valuep;
 4316: 		    (*nbval)++;
 4317: 		}
 4318: 	    }
 4319: 	}
 4320: 	for (transno = 0;
 4321: 	     (transno < state->nbTrans) && (nb < maxval);
 4322: 	     transno++) {
 4323: 	    trans = &state->trans[transno];
 4324: 	    if (trans->to < 0)
 4325: 		continue;
 4326: 	    atom = trans->atom;
 4327: 	    if ((atom == NULL) || (atom->valuep == NULL))
 4328: 		continue;
 4329: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 4330: 	        continue;
 4331: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 4332: 	        continue;
 4333: 	    } else if (trans->counter >= 0) {
 4334: 	        continue;
 4335: 	    } else {
 4336:                 if ((exec->comp->states[trans->to] != NULL) &&
 4337: 		    (exec->comp->states[trans->to]->type ==
 4338: 		     XML_REGEXP_SINK_STATE)) {
 4339: 		    if (atom->neg)
 4340: 			values[nb++] = (xmlChar *) atom->valuep2;
 4341: 		    else
 4342: 			values[nb++] = (xmlChar *) atom->valuep;
 4343: 		    (*nbneg)++;
 4344: 		}
 4345: 	    }
 4346: 	}
 4347:     }
 4348:     return(0);
 4349: }
 4350: 
 4351: /**
 4352:  * xmlRegExecNextValues:
 4353:  * @exec: a regexp execution context
 4354:  * @nbval: pointer to the number of accepted values IN/OUT
 4355:  * @nbneg: return number of negative transitions
 4356:  * @values: pointer to the array of acceptable values
 4357:  * @terminal: return value if this was a terminal state
 4358:  *
 4359:  * Extract informations from the regexp execution,
 4360:  * the parameter @values must point to an array of @nbval string pointers
 4361:  * on return nbval will contain the number of possible strings in that
 4362:  * state and the @values array will be updated with them. The string values
 4363:  * returned will be freed with the @exec context and don't need to be
 4364:  * deallocated.
 4365:  *
 4366:  * Returns: 0 in case of success or -1 in case of error.
 4367:  */
 4368: int
 4369: xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
 4370:                      xmlChar **values, int *terminal) {
 4371:     return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
 4372: }
 4373: 
 4374: /**
 4375:  * xmlRegExecErrInfo:
 4376:  * @exec: a regexp execution context generating an error
 4377:  * @string: return value for the error string
 4378:  * @nbval: pointer to the number of accepted values IN/OUT
 4379:  * @nbneg: return number of negative transitions
 4380:  * @values: pointer to the array of acceptable values
 4381:  * @terminal: return value if this was a terminal state
 4382:  *
 4383:  * Extract error informations from the regexp execution, the parameter
 4384:  * @string will be updated with the value pushed and not accepted,
 4385:  * the parameter @values must point to an array of @nbval string pointers
 4386:  * on return nbval will contain the number of possible strings in that
 4387:  * state and the @values array will be updated with them. The string values
 4388:  * returned will be freed with the @exec context and don't need to be
 4389:  * deallocated.
 4390:  *
 4391:  * Returns: 0 in case of success or -1 in case of error.
 4392:  */
 4393: int
 4394: xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
 4395:                   int *nbval, int *nbneg, xmlChar **values, int *terminal) {
 4396:     if (exec == NULL)
 4397:         return(-1);
 4398:     if (string != NULL) {
 4399:         if (exec->status != 0)
 4400: 	    *string = exec->errString;
 4401: 	else
 4402: 	    *string = NULL;
 4403:     }
 4404:     return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
 4405: }
 4406: 
 4407: #ifdef DEBUG_ERR
 4408: static void testerr(xmlRegExecCtxtPtr exec) {
 4409:     const xmlChar *string;
 4410:     xmlChar *values[5];
 4411:     int nb = 5;
 4412:     int nbneg;
 4413:     int terminal;
 4414:     xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
 4415: }
 4416: #endif
 4417: 
 4418: #if 0
 4419: static int
 4420: xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
 4421:     xmlRegTransPtr trans;
 4422:     xmlRegAtomPtr atom;
 4423:     int ret;
 4424:     int codepoint, len;
 4425: 
 4426:     if (exec == NULL)
 4427: 	return(-1);
 4428:     if (exec->status != 0)
 4429: 	return(exec->status);
 4430: 
 4431:     while ((exec->status == 0) &&
 4432: 	   ((exec->inputString[exec->index] != 0) ||
 4433: 	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
 4434: 
 4435: 	/*
 4436: 	 * End of input on non-terminal state, rollback, however we may
 4437: 	 * still have epsilon like transition for counted transitions
 4438: 	 * on counters, in that case don't break too early.
 4439: 	 */
 4440: 	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
 4441: 	    goto rollback;
 4442: 
 4443: 	exec->transcount = 0;
 4444: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 4445: 	    trans = &exec->state->trans[exec->transno];
 4446: 	    if (trans->to < 0)
 4447: 		continue;
 4448: 	    atom = trans->atom;
 4449: 	    ret = 0;
 4450: 	    if (trans->count >= 0) {
 4451: 		int count;
 4452: 		xmlRegCounterPtr counter;
 4453: 
 4454: 		/*
 4455: 		 * A counted transition.
 4456: 		 */
 4457: 
 4458: 		count = exec->counts[trans->count];
 4459: 		counter = &exec->comp->counters[trans->count];
 4460: #ifdef DEBUG_REGEXP_EXEC
 4461: 		printf("testing count %d: val %d, min %d, max %d\n",
 4462: 		       trans->count, count, counter->min,  counter->max);
 4463: #endif
 4464: 		ret = ((count >= counter->min) && (count <= counter->max));
 4465: 	    } else if (atom == NULL) {
 4466: 		fprintf(stderr, "epsilon transition left at runtime\n");
 4467: 		exec->status = -2;
 4468: 		break;
 4469: 	    } else if (exec->inputString[exec->index] != 0) {
 4470:                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
 4471: 		ret = xmlRegCheckCharacter(atom, codepoint);
 4472: 		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
 4473: 		    xmlRegStatePtr to = exec->comp->states[trans->to];
 4474: 
 4475: 		    /*
 4476: 		     * this is a multiple input sequence
 4477: 		     */
 4478: 		    if (exec->state->nbTrans > exec->transno + 1) {
 4479: 			xmlFARegExecSave(exec);
 4480: 		    }
 4481: 		    exec->transcount = 1;
 4482: 		    do {
 4483: 			/*
 4484: 			 * Try to progress as much as possible on the input
 4485: 			 */
 4486: 			if (exec->transcount == atom->max) {
 4487: 			    break;
 4488: 			}
 4489: 			exec->index += len;
 4490: 			/*
 4491: 			 * End of input: stop here
 4492: 			 */
 4493: 			if (exec->inputString[exec->index] == 0) {
 4494: 			    exec->index -= len;
 4495: 			    break;
 4496: 			}
 4497: 			if (exec->transcount >= atom->min) {
 4498: 			    int transno = exec->transno;
 4499: 			    xmlRegStatePtr state = exec->state;
 4500: 
 4501: 			    /*
 4502: 			     * The transition is acceptable save it
 4503: 			     */
 4504: 			    exec->transno = -1; /* trick */
 4505: 			    exec->state = to;
 4506: 			    xmlFARegExecSave(exec);
 4507: 			    exec->transno = transno;
 4508: 			    exec->state = state;
 4509: 			}
 4510: 			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
 4511: 				              len);
 4512: 			ret = xmlRegCheckCharacter(atom, codepoint);
 4513: 			exec->transcount++;
 4514: 		    } while (ret == 1);
 4515: 		    if (exec->transcount < atom->min)
 4516: 			ret = 0;
 4517: 
 4518: 		    /*
 4519: 		     * If the last check failed but one transition was found
 4520: 		     * possible, rollback
 4521: 		     */
 4522: 		    if (ret < 0)
 4523: 			ret = 0;
 4524: 		    if (ret == 0) {
 4525: 			goto rollback;
 4526: 		    }
 4527: 		}
 4528: 	    }
 4529: 	    if (ret == 1) {
 4530: 		if (exec->state->nbTrans > exec->transno + 1) {
 4531: 		    xmlFARegExecSave(exec);
 4532: 		}
 4533: 		/*
 4534: 		 * restart count for expressions like this ((abc){2})*
 4535: 		 */
 4536: 		if (trans->count >= 0) {
 4537: #ifdef DEBUG_REGEXP_EXEC
 4538: 		    printf("Reset count %d\n", trans->count);
 4539: #endif
 4540: 		    exec->counts[trans->count] = 0;
 4541: 		}
 4542: 		if (trans->counter >= 0) {
 4543: #ifdef DEBUG_REGEXP_EXEC
 4544: 		    printf("Increasing count %d\n", trans->counter);
 4545: #endif
 4546: 		    exec->counts[trans->counter]++;
 4547: 		}
 4548: #ifdef DEBUG_REGEXP_EXEC
 4549: 		printf("entering state %d\n", trans->to);
 4550: #endif
 4551: 		exec->state = exec->comp->states[trans->to];
 4552: 		exec->transno = 0;
 4553: 		if (trans->atom != NULL) {
 4554: 		    exec->index += len;
 4555: 		}
 4556: 		goto progress;
 4557: 	    } else if (ret < 0) {
 4558: 		exec->status = -4;
 4559: 		break;
 4560: 	    }
 4561: 	}
 4562: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 4563: rollback:
 4564: 	    /*
 4565: 	     * Failed to find a way out
 4566: 	     */
 4567: 	    exec->determinist = 0;
 4568: 	    xmlFARegExecRollBack(exec);
 4569: 	}
 4570: progress:
 4571: 	continue;
 4572:     }
 4573: }
 4574: #endif
 4575: /************************************************************************
 4576:  *									*
 4577:  *	Parser for the Schemas Datatype Regular Expressions		*
 4578:  *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
 4579:  *									*
 4580:  ************************************************************************/
 4581: 
 4582: /**
 4583:  * xmlFAIsChar:
 4584:  * @ctxt:  a regexp parser context
 4585:  *
 4586:  * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
 4587:  */
 4588: static int
 4589: xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
 4590:     int cur;
 4591:     int len;
 4592: 
 4593:     cur = CUR_SCHAR(ctxt->cur, len);
 4594:     if ((cur == '.') || (cur == '\\') || (cur == '?') ||
 4595: 	(cur == '*') || (cur == '+') || (cur == '(') ||
 4596: 	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
 4597: 	(cur == 0x5D) || (cur == 0))
 4598: 	return(-1);
 4599:     return(cur);
 4600: }
 4601: 
 4602: /**
 4603:  * xmlFAParseCharProp:
 4604:  * @ctxt:  a regexp parser context
 4605:  *
 4606:  * [27]   charProp   ::=   IsCategory | IsBlock
 4607:  * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
 4608:  *                       Separators | Symbols | Others
 4609:  * [29]   Letters   ::=   'L' [ultmo]?
 4610:  * [30]   Marks   ::=   'M' [nce]?
 4611:  * [31]   Numbers   ::=   'N' [dlo]?
 4612:  * [32]   Punctuation   ::=   'P' [cdseifo]?
 4613:  * [33]   Separators   ::=   'Z' [slp]?
 4614:  * [34]   Symbols   ::=   'S' [mcko]?
 4615:  * [35]   Others   ::=   'C' [cfon]?
 4616:  * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
 4617:  */
 4618: static void
 4619: xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
 4620:     int cur;
 4621:     xmlRegAtomType type = (xmlRegAtomType) 0;
 4622:     xmlChar *blockName = NULL;
 4623: 
 4624:     cur = CUR;
 4625:     if (cur == 'L') {
 4626: 	NEXT;
 4627: 	cur = CUR;
 4628: 	if (cur == 'u') {
 4629: 	    NEXT;
 4630: 	    type = XML_REGEXP_LETTER_UPPERCASE;
 4631: 	} else if (cur == 'l') {
 4632: 	    NEXT;
 4633: 	    type = XML_REGEXP_LETTER_LOWERCASE;
 4634: 	} else if (cur == 't') {
 4635: 	    NEXT;
 4636: 	    type = XML_REGEXP_LETTER_TITLECASE;
 4637: 	} else if (cur == 'm') {
 4638: 	    NEXT;
 4639: 	    type = XML_REGEXP_LETTER_MODIFIER;
 4640: 	} else if (cur == 'o') {
 4641: 	    NEXT;
 4642: 	    type = XML_REGEXP_LETTER_OTHERS;
 4643: 	} else {
 4644: 	    type = XML_REGEXP_LETTER;
 4645: 	}
 4646:     } else if (cur == 'M') {
 4647: 	NEXT;
 4648: 	cur = CUR;
 4649: 	if (cur == 'n') {
 4650: 	    NEXT;
 4651: 	    /* nonspacing */
 4652: 	    type = XML_REGEXP_MARK_NONSPACING;
 4653: 	} else if (cur == 'c') {
 4654: 	    NEXT;
 4655: 	    /* spacing combining */
 4656: 	    type = XML_REGEXP_MARK_SPACECOMBINING;
 4657: 	} else if (cur == 'e') {
 4658: 	    NEXT;
 4659: 	    /* enclosing */
 4660: 	    type = XML_REGEXP_MARK_ENCLOSING;
 4661: 	} else {
 4662: 	    /* all marks */
 4663: 	    type = XML_REGEXP_MARK;
 4664: 	}
 4665:     } else if (cur == 'N') {
 4666: 	NEXT;
 4667: 	cur = CUR;
 4668: 	if (cur == 'd') {
 4669: 	    NEXT;
 4670: 	    /* digital */
 4671: 	    type = XML_REGEXP_NUMBER_DECIMAL;
 4672: 	} else if (cur == 'l') {
 4673: 	    NEXT;
 4674: 	    /* letter */
 4675: 	    type = XML_REGEXP_NUMBER_LETTER;
 4676: 	} else if (cur == 'o') {
 4677: 	    NEXT;
 4678: 	    /* other */
 4679: 	    type = XML_REGEXP_NUMBER_OTHERS;
 4680: 	} else {
 4681: 	    /* all numbers */
 4682: 	    type = XML_REGEXP_NUMBER;
 4683: 	}
 4684:     } else if (cur == 'P') {
 4685: 	NEXT;
 4686: 	cur = CUR;
 4687: 	if (cur == 'c') {
 4688: 	    NEXT;
 4689: 	    /* connector */
 4690: 	    type = XML_REGEXP_PUNCT_CONNECTOR;
 4691: 	} else if (cur == 'd') {
 4692: 	    NEXT;
 4693: 	    /* dash */
 4694: 	    type = XML_REGEXP_PUNCT_DASH;
 4695: 	} else if (cur == 's') {
 4696: 	    NEXT;
 4697: 	    /* open */
 4698: 	    type = XML_REGEXP_PUNCT_OPEN;
 4699: 	} else if (cur == 'e') {
 4700: 	    NEXT;
 4701: 	    /* close */
 4702: 	    type = XML_REGEXP_PUNCT_CLOSE;
 4703: 	} else if (cur == 'i') {
 4704: 	    NEXT;
 4705: 	    /* initial quote */
 4706: 	    type = XML_REGEXP_PUNCT_INITQUOTE;
 4707: 	} else if (cur == 'f') {
 4708: 	    NEXT;
 4709: 	    /* final quote */
 4710: 	    type = XML_REGEXP_PUNCT_FINQUOTE;
 4711: 	} else if (cur == 'o') {
 4712: 	    NEXT;
 4713: 	    /* other */
 4714: 	    type = XML_REGEXP_PUNCT_OTHERS;
 4715: 	} else {
 4716: 	    /* all punctuation */
 4717: 	    type = XML_REGEXP_PUNCT;
 4718: 	}
 4719:     } else if (cur == 'Z') {
 4720: 	NEXT;
 4721: 	cur = CUR;
 4722: 	if (cur == 's') {
 4723: 	    NEXT;
 4724: 	    /* space */
 4725: 	    type = XML_REGEXP_SEPAR_SPACE;
 4726: 	} else if (cur == 'l') {
 4727: 	    NEXT;
 4728: 	    /* line */
 4729: 	    type = XML_REGEXP_SEPAR_LINE;
 4730: 	} else if (cur == 'p') {
 4731: 	    NEXT;
 4732: 	    /* paragraph */
 4733: 	    type = XML_REGEXP_SEPAR_PARA;
 4734: 	} else {
 4735: 	    /* all separators */
 4736: 	    type = XML_REGEXP_SEPAR;
 4737: 	}
 4738:     } else if (cur == 'S') {
 4739: 	NEXT;
 4740: 	cur = CUR;
 4741: 	if (cur == 'm') {
 4742: 	    NEXT;
 4743: 	    type = XML_REGEXP_SYMBOL_MATH;
 4744: 	    /* math */
 4745: 	} else if (cur == 'c') {
 4746: 	    NEXT;
 4747: 	    type = XML_REGEXP_SYMBOL_CURRENCY;
 4748: 	    /* currency */
 4749: 	} else if (cur == 'k') {
 4750: 	    NEXT;
 4751: 	    type = XML_REGEXP_SYMBOL_MODIFIER;
 4752: 	    /* modifiers */
 4753: 	} else if (cur == 'o') {
 4754: 	    NEXT;
 4755: 	    type = XML_REGEXP_SYMBOL_OTHERS;
 4756: 	    /* other */
 4757: 	} else {
 4758: 	    /* all symbols */
 4759: 	    type = XML_REGEXP_SYMBOL;
 4760: 	}
 4761:     } else if (cur == 'C') {
 4762: 	NEXT;
 4763: 	cur = CUR;
 4764: 	if (cur == 'c') {
 4765: 	    NEXT;
 4766: 	    /* control */
 4767: 	    type = XML_REGEXP_OTHER_CONTROL;
 4768: 	} else if (cur == 'f') {
 4769: 	    NEXT;
 4770: 	    /* format */
 4771: 	    type = XML_REGEXP_OTHER_FORMAT;
 4772: 	} else if (cur == 'o') {
 4773: 	    NEXT;
 4774: 	    /* private use */
 4775: 	    type = XML_REGEXP_OTHER_PRIVATE;
 4776: 	} else if (cur == 'n') {
 4777: 	    NEXT;
 4778: 	    /* not assigned */
 4779: 	    type = XML_REGEXP_OTHER_NA;
 4780: 	} else {
 4781: 	    /* all others */
 4782: 	    type = XML_REGEXP_OTHER;
 4783: 	}
 4784:     } else if (cur == 'I') {
 4785: 	const xmlChar *start;
 4786: 	NEXT;
 4787: 	cur = CUR;
 4788: 	if (cur != 's') {
 4789: 	    ERROR("IsXXXX expected");
 4790: 	    return;
 4791: 	}
 4792: 	NEXT;
 4793: 	start = ctxt->cur;
 4794: 	cur = CUR;
 4795: 	if (((cur >= 'a') && (cur <= 'z')) ||
 4796: 	    ((cur >= 'A') && (cur <= 'Z')) ||
 4797: 	    ((cur >= '0') && (cur <= '9')) ||
 4798: 	    (cur == 0x2D)) {
 4799: 	    NEXT;
 4800: 	    cur = CUR;
 4801: 	    while (((cur >= 'a') && (cur <= 'z')) ||
 4802: 		((cur >= 'A') && (cur <= 'Z')) ||
 4803: 		((cur >= '0') && (cur <= '9')) ||
 4804: 		(cur == 0x2D)) {
 4805: 		NEXT;
 4806: 		cur = CUR;
 4807: 	    }
 4808: 	}
 4809: 	type = XML_REGEXP_BLOCK_NAME;
 4810: 	blockName = xmlStrndup(start, ctxt->cur - start);
 4811:     } else {
 4812: 	ERROR("Unknown char property");
 4813: 	return;
 4814:     }
 4815:     if (ctxt->atom == NULL) {
 4816: 	ctxt->atom = xmlRegNewAtom(ctxt, type);
 4817: 	if (ctxt->atom != NULL)
 4818: 	    ctxt->atom->valuep = blockName;
 4819:     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4820:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4821: 		           type, 0, 0, blockName);
 4822:     }
 4823: }
 4824: 
 4825: /**
 4826:  * xmlFAParseCharClassEsc:
 4827:  * @ctxt:  a regexp parser context
 4828:  *
 4829:  * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
 4830:  * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
 4831:  * [25] catEsc   ::=   '\p{' charProp '}'
 4832:  * [26] complEsc ::=   '\P{' charProp '}'
 4833:  * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
 4834:  */
 4835: static void
 4836: xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
 4837:     int cur;
 4838: 
 4839:     if (CUR == '.') {
 4840: 	if (ctxt->atom == NULL) {
 4841: 	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
 4842: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4843: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4844: 			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
 4845: 	}
 4846: 	NEXT;
 4847: 	return;
 4848:     }
 4849:     if (CUR != '\\') {
 4850: 	ERROR("Escaped sequence: expecting \\");
 4851: 	return;
 4852:     }
 4853:     NEXT;
 4854:     cur = CUR;
 4855:     if (cur == 'p') {
 4856: 	NEXT;
 4857: 	if (CUR != '{') {
 4858: 	    ERROR("Expecting '{'");
 4859: 	    return;
 4860: 	}
 4861: 	NEXT;
 4862: 	xmlFAParseCharProp(ctxt);
 4863: 	if (CUR != '}') {
 4864: 	    ERROR("Expecting '}'");
 4865: 	    return;
 4866: 	}
 4867: 	NEXT;
 4868:     } else if (cur == 'P') {
 4869: 	NEXT;
 4870: 	if (CUR != '{') {
 4871: 	    ERROR("Expecting '{'");
 4872: 	    return;
 4873: 	}
 4874: 	NEXT;
 4875: 	xmlFAParseCharProp(ctxt);
 4876: 	ctxt->atom->neg = 1;
 4877: 	if (CUR != '}') {
 4878: 	    ERROR("Expecting '}'");
 4879: 	    return;
 4880: 	}
 4881: 	NEXT;
 4882:     } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
 4883: 	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
 4884: 	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
 4885: 	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
 4886: 	(cur == 0x5E)) {
 4887: 	if (ctxt->atom == NULL) {
 4888: 	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
 4889: 	    if (ctxt->atom != NULL) {
 4890: 	        switch (cur) {
 4891: 		    case 'n':
 4892: 		        ctxt->atom->codepoint = '\n';
 4893: 			break;
 4894: 		    case 'r':
 4895: 		        ctxt->atom->codepoint = '\r';
 4896: 			break;
 4897: 		    case 't':
 4898: 		        ctxt->atom->codepoint = '\t';
 4899: 			break;
 4900: 		    default:
 4901: 			ctxt->atom->codepoint = cur;
 4902: 		}
 4903: 	    }
 4904: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4905:             switch (cur) {
 4906:                 case 'n':
 4907:                     cur = '\n';
 4908:                     break;
 4909:                 case 'r':
 4910:                     cur = '\r';
 4911:                     break;
 4912:                 case 't':
 4913:                     cur = '\t';
 4914:                     break;
 4915:             }
 4916: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4917: 			       XML_REGEXP_CHARVAL, cur, cur, NULL);
 4918: 	}
 4919: 	NEXT;
 4920:     } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
 4921: 	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
 4922: 	(cur == 'w') || (cur == 'W')) {
 4923: 	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
 4924: 
 4925: 	switch (cur) {
 4926: 	    case 's':
 4927: 		type = XML_REGEXP_ANYSPACE;
 4928: 		break;
 4929: 	    case 'S':
 4930: 		type = XML_REGEXP_NOTSPACE;
 4931: 		break;
 4932: 	    case 'i':
 4933: 		type = XML_REGEXP_INITNAME;
 4934: 		break;
 4935: 	    case 'I':
 4936: 		type = XML_REGEXP_NOTINITNAME;
 4937: 		break;
 4938: 	    case 'c':
 4939: 		type = XML_REGEXP_NAMECHAR;
 4940: 		break;
 4941: 	    case 'C':
 4942: 		type = XML_REGEXP_NOTNAMECHAR;
 4943: 		break;
 4944: 	    case 'd':
 4945: 		type = XML_REGEXP_DECIMAL;
 4946: 		break;
 4947: 	    case 'D':
 4948: 		type = XML_REGEXP_NOTDECIMAL;
 4949: 		break;
 4950: 	    case 'w':
 4951: 		type = XML_REGEXP_REALCHAR;
 4952: 		break;
 4953: 	    case 'W':
 4954: 		type = XML_REGEXP_NOTREALCHAR;
 4955: 		break;
 4956: 	}
 4957: 	NEXT;
 4958: 	if (ctxt->atom == NULL) {
 4959: 	    ctxt->atom = xmlRegNewAtom(ctxt, type);
 4960: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4961: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4962: 			       type, 0, 0, NULL);
 4963: 	}
 4964:     } else {
 4965: 	ERROR("Wrong escape sequence, misuse of character '\\'");
 4966:     }
 4967: }
 4968: 
 4969: /**
 4970:  * xmlFAParseCharRange:
 4971:  * @ctxt:  a regexp parser context
 4972:  *
 4973:  * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash
 4974:  * [18]   seRange   ::=   charOrEsc '-' charOrEsc
 4975:  * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
 4976:  * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
 4977:  * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
 4978:  */
 4979: static void
 4980: xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
 4981:     int cur, len;
 4982:     int start = -1;
 4983:     int end = -1;
 4984: 
 4985:     if (CUR == '\0') {
 4986:         ERROR("Expecting ']'");
 4987: 	return;
 4988:     }
 4989: 
 4990:     cur = CUR;
 4991:     if (cur == '\\') {
 4992: 	NEXT;
 4993: 	cur = CUR;
 4994: 	switch (cur) {
 4995: 	    case 'n': start = 0xA; break;
 4996: 	    case 'r': start = 0xD; break;
 4997: 	    case 't': start = 0x9; break;
 4998: 	    case '\\': case '|': case '.': case '-': case '^': case '?':
 4999: 	    case '*': case '+': case '{': case '}': case '(': case ')':
 5000: 	    case '[': case ']':
 5001: 		start = cur; break;
 5002: 	    default:
 5003: 		ERROR("Invalid escape value");
 5004: 		return;
 5005: 	}
 5006: 	end = start;
 5007:         len = 1;
 5008:     } else if ((cur != 0x5B) && (cur != 0x5D)) {
 5009:         end = start = CUR_SCHAR(ctxt->cur, len);
 5010:     } else {
 5011: 	ERROR("Expecting a char range");
 5012: 	return;
 5013:     }
 5014:     /*
 5015:      * Since we are "inside" a range, we can assume ctxt->cur is past
 5016:      * the start of ctxt->string, and PREV should be safe
 5017:      */
 5018:     if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
 5019: 	NEXTL(len);
 5020: 	return;
 5021:     }
 5022:     NEXTL(len);
 5023:     cur = CUR;
 5024:     if ((cur != '-') || (NXT(1) == ']')) {
 5025:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 5026: 		              XML_REGEXP_CHARVAL, start, end, NULL);
 5027: 	return;
 5028:     }
 5029:     NEXT;
 5030:     cur = CUR;
 5031:     if (cur == '\\') {
 5032: 	NEXT;
 5033: 	cur = CUR;
 5034: 	switch (cur) {
 5035: 	    case 'n': end = 0xA; break;
 5036: 	    case 'r': end = 0xD; break;
 5037: 	    case 't': end = 0x9; break;
 5038: 	    case '\\': case '|': case '.': case '-': case '^': case '?':
 5039: 	    case '*': case '+': case '{': case '}': case '(': case ')':
 5040: 	    case '[': case ']':
 5041: 		end = cur; break;
 5042: 	    default:
 5043: 		ERROR("Invalid escape value");
 5044: 		return;
 5045: 	}
 5046:         len = 1;
 5047:     } else if ((cur != 0x5B) && (cur != 0x5D)) {
 5048:         end = CUR_SCHAR(ctxt->cur, len);
 5049:     } else {
 5050: 	ERROR("Expecting the end of a char range");
 5051: 	return;
 5052:     }
 5053:     NEXTL(len);
 5054:     /* TODO check that the values are acceptable character ranges for XML */
 5055:     if (end < start) {
 5056: 	ERROR("End of range is before start of range");
 5057:     } else {
 5058:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 5059: 		           XML_REGEXP_CHARVAL, start, end, NULL);
 5060:     }
 5061:     return;
 5062: }
 5063: 
 5064: /**
 5065:  * xmlFAParsePosCharGroup:
 5066:  * @ctxt:  a regexp parser context
 5067:  *
 5068:  * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
 5069:  */
 5070: static void
 5071: xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
 5072:     do {
 5073: 	if (CUR == '\\') {
 5074: 	    xmlFAParseCharClassEsc(ctxt);
 5075: 	} else {
 5076: 	    xmlFAParseCharRange(ctxt);
 5077: 	}
 5078:     } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
 5079:              (CUR != 0) && (ctxt->error == 0));
 5080: }
 5081: 
 5082: /**
 5083:  * xmlFAParseCharGroup:
 5084:  * @ctxt:  a regexp parser context
 5085:  *
 5086:  * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
 5087:  * [15]   negCharGroup ::= '^' posCharGroup
 5088:  * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
 5089:  * [12]   charClassExpr ::= '[' charGroup ']'
 5090:  */
 5091: static void
 5092: xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
 5093:     int n = ctxt->neg;
 5094:     while ((CUR != ']') && (ctxt->error == 0)) {
 5095: 	if (CUR == '^') {
 5096: 	    int neg = ctxt->neg;
 5097: 
 5098: 	    NEXT;
 5099: 	    ctxt->neg = !ctxt->neg;
 5100: 	    xmlFAParsePosCharGroup(ctxt);
 5101: 	    ctxt->neg = neg;
 5102: 	} else if ((CUR == '-') && (NXT(1) == '[')) {
 5103: 	    int neg = ctxt->neg;
 5104: 	    ctxt->neg = 2;
 5105: 	    NEXT;	/* eat the '-' */
 5106: 	    NEXT;	/* eat the '[' */
 5107: 	    xmlFAParseCharGroup(ctxt);
 5108: 	    if (CUR == ']') {
 5109: 		NEXT;
 5110: 	    } else {
 5111: 		ERROR("charClassExpr: ']' expected");
 5112: 		break;
 5113: 	    }
 5114: 	    ctxt->neg = neg;
 5115: 	    break;
 5116: 	} else if (CUR != ']') {
 5117: 	    xmlFAParsePosCharGroup(ctxt);
 5118: 	}
 5119:     }
 5120:     ctxt->neg = n;
 5121: }
 5122: 
 5123: /**
 5124:  * xmlFAParseCharClass:
 5125:  * @ctxt:  a regexp parser context
 5126:  *
 5127:  * [11]   charClass   ::=     charClassEsc | charClassExpr
 5128:  * [12]   charClassExpr   ::=   '[' charGroup ']'
 5129:  */
 5130: static void
 5131: xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
 5132:     if (CUR == '[') {
 5133: 	NEXT;
 5134: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
 5135: 	if (ctxt->atom == NULL)
 5136: 	    return;
 5137: 	xmlFAParseCharGroup(ctxt);
 5138: 	if (CUR == ']') {
 5139: 	    NEXT;
 5140: 	} else {
 5141: 	    ERROR("xmlFAParseCharClass: ']' expected");
 5142: 	}
 5143:     } else {
 5144: 	xmlFAParseCharClassEsc(ctxt);
 5145:     }
 5146: }
 5147: 
 5148: /**
 5149:  * xmlFAParseQuantExact:
 5150:  * @ctxt:  a regexp parser context
 5151:  *
 5152:  * [8]   QuantExact   ::=   [0-9]+
 5153:  *
 5154:  * Returns 0 if success or -1 in case of error
 5155:  */
 5156: static int
 5157: xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
 5158:     int ret = 0;
 5159:     int ok = 0;
 5160: 
 5161:     while ((CUR >= '0') && (CUR <= '9')) {
 5162: 	ret = ret * 10 + (CUR - '0');
 5163: 	ok = 1;
 5164: 	NEXT;
 5165:     }
 5166:     if (ok != 1) {
 5167: 	return(-1);
 5168:     }
 5169:     return(ret);
 5170: }
 5171: 
 5172: /**
 5173:  * xmlFAParseQuantifier:
 5174:  * @ctxt:  a regexp parser context
 5175:  *
 5176:  * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
 5177:  * [5]   quantity   ::=   quantRange | quantMin | QuantExact
 5178:  * [6]   quantRange   ::=   QuantExact ',' QuantExact
 5179:  * [7]   quantMin   ::=   QuantExact ','
 5180:  * [8]   QuantExact   ::=   [0-9]+
 5181:  */
 5182: static int
 5183: xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
 5184:     int cur;
 5185: 
 5186:     cur = CUR;
 5187:     if ((cur == '?') || (cur == '*') || (cur == '+')) {
 5188: 	if (ctxt->atom != NULL) {
 5189: 	    if (cur == '?')
 5190: 		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
 5191: 	    else if (cur == '*')
 5192: 		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
 5193: 	    else if (cur == '+')
 5194: 		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
 5195: 	}
 5196: 	NEXT;
 5197: 	return(1);
 5198:     }
 5199:     if (cur == '{') {
 5200: 	int min = 0, max = 0;
 5201: 
 5202: 	NEXT;
 5203: 	cur = xmlFAParseQuantExact(ctxt);
 5204: 	if (cur >= 0)
 5205: 	    min = cur;
 5206: 	if (CUR == ',') {
 5207: 	    NEXT;
 5208: 	    if (CUR == '}')
 5209: 	        max = INT_MAX;
 5210: 	    else {
 5211: 	        cur = xmlFAParseQuantExact(ctxt);
 5212: 	        if (cur >= 0)
 5213: 		    max = cur;
 5214: 		else {
 5215: 		    ERROR("Improper quantifier");
 5216: 		}
 5217: 	    }
 5218: 	}
 5219: 	if (CUR == '}') {
 5220: 	    NEXT;
 5221: 	} else {
 5222: 	    ERROR("Unterminated quantifier");
 5223: 	}
 5224: 	if (max == 0)
 5225: 	    max = min;
 5226: 	if (ctxt->atom != NULL) {
 5227: 	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
 5228: 	    ctxt->atom->min = min;
 5229: 	    ctxt->atom->max = max;
 5230: 	}
 5231: 	return(1);
 5232:     }
 5233:     return(0);
 5234: }
 5235: 
 5236: /**
 5237:  * xmlFAParseAtom:
 5238:  * @ctxt:  a regexp parser context
 5239:  *
 5240:  * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
 5241:  */
 5242: static int
 5243: xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
 5244:     int codepoint, len;
 5245: 
 5246:     codepoint = xmlFAIsChar(ctxt);
 5247:     if (codepoint > 0) {
 5248: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
 5249: 	if (ctxt->atom == NULL)
 5250: 	    return(-1);
 5251: 	codepoint = CUR_SCHAR(ctxt->cur, len);
 5252: 	ctxt->atom->codepoint = codepoint;
 5253: 	NEXTL(len);
 5254: 	return(1);
 5255:     } else if (CUR == '|') {
 5256: 	return(0);
 5257:     } else if (CUR == 0) {
 5258: 	return(0);
 5259:     } else if (CUR == ')') {
 5260: 	return(0);
 5261:     } else if (CUR == '(') {
 5262: 	xmlRegStatePtr start, oldend, start0;
 5263: 
 5264: 	NEXT;
 5265: 	/*
 5266: 	 * this extra Epsilon transition is needed if we count with 0 allowed
 5267: 	 * unfortunately this can't be known at that point
 5268: 	 */
 5269: 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 5270: 	start0 = ctxt->state;
 5271: 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 5272: 	start = ctxt->state;
 5273: 	oldend = ctxt->end;
 5274: 	ctxt->end = NULL;
 5275: 	ctxt->atom = NULL;
 5276: 	xmlFAParseRegExp(ctxt, 0);
 5277: 	if (CUR == ')') {
 5278: 	    NEXT;
 5279: 	} else {
 5280: 	    ERROR("xmlFAParseAtom: expecting ')'");
 5281: 	}
 5282: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
 5283: 	if (ctxt->atom == NULL)
 5284: 	    return(-1);
 5285: 	ctxt->atom->start = start;
 5286: 	ctxt->atom->start0 = start0;
 5287: 	ctxt->atom->stop = ctxt->state;
 5288: 	ctxt->end = oldend;
 5289: 	return(1);
 5290:     } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
 5291: 	xmlFAParseCharClass(ctxt);
 5292: 	return(1);
 5293:     }
 5294:     return(0);
 5295: }
 5296: 
 5297: /**
 5298:  * xmlFAParsePiece:
 5299:  * @ctxt:  a regexp parser context
 5300:  *
 5301:  * [3]   piece   ::=   atom quantifier?
 5302:  */
 5303: static int
 5304: xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
 5305:     int ret;
 5306: 
 5307:     ctxt->atom = NULL;
 5308:     ret = xmlFAParseAtom(ctxt);
 5309:     if (ret == 0)
 5310: 	return(0);
 5311:     if (ctxt->atom == NULL) {
 5312: 	ERROR("internal: no atom generated");
 5313:     }
 5314:     xmlFAParseQuantifier(ctxt);
 5315:     return(1);
 5316: }
 5317: 
 5318: /**
 5319:  * xmlFAParseBranch:
 5320:  * @ctxt:  a regexp parser context
 5321:  * @to: optional target to the end of the branch
 5322:  *
 5323:  * @to is used to optimize by removing duplicate path in automata
 5324:  * in expressions like (a|b)(c|d)
 5325:  *
 5326:  * [2]   branch   ::=   piece*
 5327:  */
 5328: static int
 5329: xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
 5330:     xmlRegStatePtr previous;
 5331:     int ret;
 5332: 
 5333:     previous = ctxt->state;
 5334:     ret = xmlFAParsePiece(ctxt);
 5335:     if (ret != 0) {
 5336: 	if (xmlFAGenerateTransitions(ctxt, previous,
 5337: 	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 5338: 	    return(-1);
 5339: 	previous = ctxt->state;
 5340: 	ctxt->atom = NULL;
 5341:     }
 5342:     while ((ret != 0) && (ctxt->error == 0)) {
 5343: 	ret = xmlFAParsePiece(ctxt);
 5344: 	if (ret != 0) {
 5345: 	    if (xmlFAGenerateTransitions(ctxt, previous,
 5346: 	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 5347: 		    return(-1);
 5348: 	    previous = ctxt->state;
 5349: 	    ctxt->atom = NULL;
 5350: 	}
 5351:     }
 5352:     return(0);
 5353: }
 5354: 
 5355: /**
 5356:  * xmlFAParseRegExp:
 5357:  * @ctxt:  a regexp parser context
 5358:  * @top:  is this the top-level expression ?
 5359:  *
 5360:  * [1]   regExp   ::=     branch  ( '|' branch )*
 5361:  */
 5362: static void
 5363: xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
 5364:     xmlRegStatePtr start, end;
 5365: 
 5366:     /* if not top start should have been generated by an epsilon trans */
 5367:     start = ctxt->state;
 5368:     ctxt->end = NULL;
 5369:     xmlFAParseBranch(ctxt, NULL);
 5370:     if (top) {
 5371: #ifdef DEBUG_REGEXP_GRAPH
 5372: 	printf("State %d is final\n", ctxt->state->no);
 5373: #endif
 5374: 	ctxt->state->type = XML_REGEXP_FINAL_STATE;
 5375:     }
 5376:     if (CUR != '|') {
 5377: 	ctxt->end = ctxt->state;
 5378: 	return;
 5379:     }
 5380:     end = ctxt->state;
 5381:     while ((CUR == '|') && (ctxt->error == 0)) {
 5382: 	NEXT;
 5383: 	if (CUR == 0) {
 5384: 	    ERROR("expecting a branch after |")
 5385: 	    return;
 5386: 	}
 5387: 	ctxt->state = start;
 5388: 	ctxt->end = NULL;
 5389: 	xmlFAParseBranch(ctxt, end);
 5390:     }
 5391:     if (!top) {
 5392: 	ctxt->state = end;
 5393: 	ctxt->end = end;
 5394:     }
 5395: }
 5396: 
 5397: /************************************************************************
 5398:  *									*
 5399:  *			The basic API					*
 5400:  *									*
 5401:  ************************************************************************/
 5402: 
 5403: /**
 5404:  * xmlRegexpPrint:
 5405:  * @output: the file for the output debug
 5406:  * @regexp: the compiled regexp
 5407:  *
 5408:  * Print the content of the compiled regular expression
 5409:  */
 5410: void
 5411: xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
 5412:     int i;
 5413: 
 5414:     if (output == NULL)
 5415:         return;
 5416:     fprintf(output, " regexp: ");
 5417:     if (regexp == NULL) {
 5418: 	fprintf(output, "NULL\n");
 5419: 	return;
 5420:     }
 5421:     fprintf(output, "'%s' ", regexp->string);
 5422:     fprintf(output, "\n");
 5423:     fprintf(output, "%d atoms:\n", regexp->nbAtoms);
 5424:     for (i = 0;i < regexp->nbAtoms; i++) {
 5425: 	fprintf(output, " %02d ", i);
 5426: 	xmlRegPrintAtom(output, regexp->atoms[i]);
 5427:     }
 5428:     fprintf(output, "%d states:", regexp->nbStates);
 5429:     fprintf(output, "\n");
 5430:     for (i = 0;i < regexp->nbStates; i++) {
 5431: 	xmlRegPrintState(output, regexp->states[i]);
 5432:     }
 5433:     fprintf(output, "%d counters:\n", regexp->nbCounters);
 5434:     for (i = 0;i < regexp->nbCounters; i++) {
 5435: 	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
 5436: 		                                regexp->counters[i].max);
 5437:     }
 5438: }
 5439: 
 5440: /**
 5441:  * xmlRegexpCompile:
 5442:  * @regexp:  a regular expression string
 5443:  *
 5444:  * Parses a regular expression conforming to XML Schemas Part 2 Datatype
 5445:  * Appendix F and builds an automata suitable for testing strings against
 5446:  * that regular expression
 5447:  *
 5448:  * Returns the compiled expression or NULL in case of error
 5449:  */
 5450: xmlRegexpPtr
 5451: xmlRegexpCompile(const xmlChar *regexp) {
 5452:     xmlRegexpPtr ret;
 5453:     xmlRegParserCtxtPtr ctxt;
 5454: 
 5455:     ctxt = xmlRegNewParserCtxt(regexp);
 5456:     if (ctxt == NULL)
 5457: 	return(NULL);
 5458: 
 5459:     /* initialize the parser */
 5460:     ctxt->end = NULL;
 5461:     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
 5462:     xmlRegStatePush(ctxt, ctxt->start);
 5463: 
 5464:     /* parse the expression building an automata */
 5465:     xmlFAParseRegExp(ctxt, 1);
 5466:     if (CUR != 0) {
 5467: 	ERROR("xmlFAParseRegExp: extra characters");
 5468:     }
 5469:     if (ctxt->error != 0) {
 5470: 	xmlRegFreeParserCtxt(ctxt);
 5471: 	return(NULL);
 5472:     }
 5473:     ctxt->end = ctxt->state;
 5474:     ctxt->start->type = XML_REGEXP_START_STATE;
 5475:     ctxt->end->type = XML_REGEXP_FINAL_STATE;
 5476: 
 5477:     /* remove the Epsilon except for counted transitions */
 5478:     xmlFAEliminateEpsilonTransitions(ctxt);
 5479: 
 5480: 
 5481:     if (ctxt->error != 0) {
 5482: 	xmlRegFreeParserCtxt(ctxt);
 5483: 	return(NULL);
 5484:     }
 5485:     ret = xmlRegEpxFromParse(ctxt);
 5486:     xmlRegFreeParserCtxt(ctxt);
 5487:     return(ret);
 5488: }
 5489: 
 5490: /**
 5491:  * xmlRegexpExec:
 5492:  * @comp:  the compiled regular expression
 5493:  * @content:  the value to check against the regular expression
 5494:  *
 5495:  * Check if the regular expression generates the value
 5496:  *
 5497:  * Returns 1 if it matches, 0 if not and a negative value in case of error
 5498:  */
 5499: int
 5500: xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
 5501:     if ((comp == NULL) || (content == NULL))
 5502: 	return(-1);
 5503:     return(xmlFARegExec(comp, content));
 5504: }
 5505: 
 5506: /**
 5507:  * xmlRegexpIsDeterminist:
 5508:  * @comp:  the compiled regular expression
 5509:  *
 5510:  * Check if the regular expression is determinist
 5511:  *
 5512:  * Returns 1 if it yes, 0 if not and a negative value in case of error
 5513:  */
 5514: int
 5515: xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
 5516:     xmlAutomataPtr am;
 5517:     int ret;
 5518: 
 5519:     if (comp == NULL)
 5520: 	return(-1);
 5521:     if (comp->determinist != -1)
 5522: 	return(comp->determinist);
 5523: 
 5524:     am = xmlNewAutomata();
 5525:     if (am->states != NULL) {
 5526: 	int i;
 5527: 
 5528: 	for (i = 0;i < am->nbStates;i++)
 5529: 	    xmlRegFreeState(am->states[i]);
 5530: 	xmlFree(am->states);
 5531:     }
 5532:     am->nbAtoms = comp->nbAtoms;
 5533:     am->atoms = comp->atoms;
 5534:     am->nbStates = comp->nbStates;
 5535:     am->states = comp->states;
 5536:     am->determinist = -1;
 5537:     am->flags = comp->flags;
 5538:     ret = xmlFAComputesDeterminism(am);
 5539:     am->atoms = NULL;
 5540:     am->states = NULL;
 5541:     xmlFreeAutomata(am);
 5542:     comp->determinist = ret;
 5543:     return(ret);
 5544: }
 5545: 
 5546: /**
 5547:  * xmlRegFreeRegexp:
 5548:  * @regexp:  the regexp
 5549:  *
 5550:  * Free a regexp
 5551:  */
 5552: void
 5553: xmlRegFreeRegexp(xmlRegexpPtr regexp) {
 5554:     int i;
 5555:     if (regexp == NULL)
 5556: 	return;
 5557: 
 5558:     if (regexp->string != NULL)
 5559: 	xmlFree(regexp->string);
 5560:     if (regexp->states != NULL) {
 5561: 	for (i = 0;i < regexp->nbStates;i++)
 5562: 	    xmlRegFreeState(regexp->states[i]);
 5563: 	xmlFree(regexp->states);
 5564:     }
 5565:     if (regexp->atoms != NULL) {
 5566: 	for (i = 0;i < regexp->nbAtoms;i++)
 5567: 	    xmlRegFreeAtom(regexp->atoms[i]);
 5568: 	xmlFree(regexp->atoms);
 5569:     }
 5570:     if (regexp->counters != NULL)
 5571: 	xmlFree(regexp->counters);
 5572:     if (regexp->compact != NULL)
 5573: 	xmlFree(regexp->compact);
 5574:     if (regexp->transdata != NULL)
 5575: 	xmlFree(regexp->transdata);
 5576:     if (regexp->stringMap != NULL) {
 5577: 	for (i = 0; i < regexp->nbstrings;i++)
 5578: 	    xmlFree(regexp->stringMap[i]);
 5579: 	xmlFree(regexp->stringMap);
 5580:     }
 5581: 
 5582:     xmlFree(regexp);
 5583: }
 5584: 
 5585: #ifdef LIBXML_AUTOMATA_ENABLED
 5586: /************************************************************************
 5587:  *									*
 5588:  *			The Automata interface				*
 5589:  *									*
 5590:  ************************************************************************/
 5591: 
 5592: /**
 5593:  * xmlNewAutomata:
 5594:  *
 5595:  * Create a new automata
 5596:  *
 5597:  * Returns the new object or NULL in case of failure
 5598:  */
 5599: xmlAutomataPtr
 5600: xmlNewAutomata(void) {
 5601:     xmlAutomataPtr ctxt;
 5602: 
 5603:     ctxt = xmlRegNewParserCtxt(NULL);
 5604:     if (ctxt == NULL)
 5605: 	return(NULL);
 5606: 
 5607:     /* initialize the parser */
 5608:     ctxt->end = NULL;
 5609:     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
 5610:     if (ctxt->start == NULL) {
 5611: 	xmlFreeAutomata(ctxt);
 5612: 	return(NULL);
 5613:     }
 5614:     ctxt->start->type = XML_REGEXP_START_STATE;
 5615:     if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
 5616:         xmlRegFreeState(ctxt->start);
 5617: 	xmlFreeAutomata(ctxt);
 5618: 	return(NULL);
 5619:     }
 5620:     ctxt->flags = 0;
 5621: 
 5622:     return(ctxt);
 5623: }
 5624: 
 5625: /**
 5626:  * xmlFreeAutomata:
 5627:  * @am: an automata
 5628:  *
 5629:  * Free an automata
 5630:  */
 5631: void
 5632: xmlFreeAutomata(xmlAutomataPtr am) {
 5633:     if (am == NULL)
 5634: 	return;
 5635:     xmlRegFreeParserCtxt(am);
 5636: }
 5637: 
 5638: /**
 5639:  * xmlAutomataSetFlags:
 5640:  * @am: an automata
 5641:  * @flags:  a set of internal flags
 5642:  *
 5643:  * Set some flags on the automata
 5644:  */
 5645: void
 5646: xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
 5647:     if (am == NULL)
 5648: 	return;
 5649:     am->flags |= flags;
 5650: }
 5651: 
 5652: /**
 5653:  * xmlAutomataGetInitState:
 5654:  * @am: an automata
 5655:  *
 5656:  * Initial state lookup
 5657:  *
 5658:  * Returns the initial state of the automata
 5659:  */
 5660: xmlAutomataStatePtr
 5661: xmlAutomataGetInitState(xmlAutomataPtr am) {
 5662:     if (am == NULL)
 5663: 	return(NULL);
 5664:     return(am->start);
 5665: }
 5666: 
 5667: /**
 5668:  * xmlAutomataSetFinalState:
 5669:  * @am: an automata
 5670:  * @state: a state in this automata
 5671:  *
 5672:  * Makes that state a final state
 5673:  *
 5674:  * Returns 0 or -1 in case of error
 5675:  */
 5676: int
 5677: xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
 5678:     if ((am == NULL) || (state == NULL))
 5679: 	return(-1);
 5680:     state->type = XML_REGEXP_FINAL_STATE;
 5681:     return(0);
 5682: }
 5683: 
 5684: /**
 5685:  * xmlAutomataNewTransition:
 5686:  * @am: an automata
 5687:  * @from: the starting point of the transition
 5688:  * @to: the target point of the transition or NULL
 5689:  * @token: the input string associated to that transition
 5690:  * @data: data passed to the callback function if the transition is activated
 5691:  *
 5692:  * If @to is NULL, this creates first a new target state in the automata
 5693:  * and then adds a transition from the @from state to the target state
 5694:  * activated by the value of @token
 5695:  *
 5696:  * Returns the target state or NULL in case of error
 5697:  */
 5698: xmlAutomataStatePtr
 5699: xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5700: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5701: 			 void *data) {
 5702:     xmlRegAtomPtr atom;
 5703: 
 5704:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5705: 	return(NULL);
 5706:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5707:     if (atom == NULL)
 5708:         return(NULL);
 5709:     atom->data = data;
 5710:     if (atom == NULL)
 5711: 	return(NULL);
 5712:     atom->valuep = xmlStrdup(token);
 5713: 
 5714:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5715:         xmlRegFreeAtom(atom);
 5716: 	return(NULL);
 5717:     }
 5718:     if (to == NULL)
 5719: 	return(am->state);
 5720:     return(to);
 5721: }
 5722: 
 5723: /**
 5724:  * xmlAutomataNewTransition2:
 5725:  * @am: an automata
 5726:  * @from: the starting point of the transition
 5727:  * @to: the target point of the transition or NULL
 5728:  * @token: the first input string associated to that transition
 5729:  * @token2: the second input string associated to that transition
 5730:  * @data: data passed to the callback function if the transition is activated
 5731:  *
 5732:  * If @to is NULL, this creates first a new target state in the automata
 5733:  * and then adds a transition from the @from state to the target state
 5734:  * activated by the value of @token
 5735:  *
 5736:  * Returns the target state or NULL in case of error
 5737:  */
 5738: xmlAutomataStatePtr
 5739: xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5740: 			  xmlAutomataStatePtr to, const xmlChar *token,
 5741: 			  const xmlChar *token2, void *data) {
 5742:     xmlRegAtomPtr atom;
 5743: 
 5744:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5745: 	return(NULL);
 5746:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5747:     if (atom == NULL)
 5748: 	return(NULL);
 5749:     atom->data = data;
 5750:     if ((token2 == NULL) || (*token2 == 0)) {
 5751: 	atom->valuep = xmlStrdup(token);
 5752:     } else {
 5753: 	int lenn, lenp;
 5754: 	xmlChar *str;
 5755: 
 5756: 	lenn = strlen((char *) token2);
 5757: 	lenp = strlen((char *) token);
 5758: 
 5759: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5760: 	if (str == NULL) {
 5761: 	    xmlRegFreeAtom(atom);
 5762: 	    return(NULL);
 5763: 	}
 5764: 	memcpy(&str[0], token, lenp);
 5765: 	str[lenp] = '|';
 5766: 	memcpy(&str[lenp + 1], token2, lenn);
 5767: 	str[lenn + lenp + 1] = 0;
 5768: 
 5769: 	atom->valuep = str;
 5770:     }
 5771: 
 5772:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5773:         xmlRegFreeAtom(atom);
 5774: 	return(NULL);
 5775:     }
 5776:     if (to == NULL)
 5777: 	return(am->state);
 5778:     return(to);
 5779: }
 5780: 
 5781: /**
 5782:  * xmlAutomataNewNegTrans:
 5783:  * @am: an automata
 5784:  * @from: the starting point of the transition
 5785:  * @to: the target point of the transition or NULL
 5786:  * @token: the first input string associated to that transition
 5787:  * @token2: the second input string associated to that transition
 5788:  * @data: data passed to the callback function if the transition is activated
 5789:  *
 5790:  * If @to is NULL, this creates first a new target state in the automata
 5791:  * and then adds a transition from the @from state to the target state
 5792:  * activated by any value except (@token,@token2)
 5793:  * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
 5794:  # the semantic of XSD ##other
 5795:  *
 5796:  * Returns the target state or NULL in case of error
 5797:  */
 5798: xmlAutomataStatePtr
 5799: xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5800: 		       xmlAutomataStatePtr to, const xmlChar *token,
 5801: 		       const xmlChar *token2, void *data) {
 5802:     xmlRegAtomPtr atom;
 5803:     xmlChar err_msg[200];
 5804: 
 5805:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5806: 	return(NULL);
 5807:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5808:     if (atom == NULL)
 5809: 	return(NULL);
 5810:     atom->data = data;
 5811:     atom->neg = 1;
 5812:     if ((token2 == NULL) || (*token2 == 0)) {
 5813: 	atom->valuep = xmlStrdup(token);
 5814:     } else {
 5815: 	int lenn, lenp;
 5816: 	xmlChar *str;
 5817: 
 5818: 	lenn = strlen((char *) token2);
 5819: 	lenp = strlen((char *) token);
 5820: 
 5821: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5822: 	if (str == NULL) {
 5823: 	    xmlRegFreeAtom(atom);
 5824: 	    return(NULL);
 5825: 	}
 5826: 	memcpy(&str[0], token, lenp);
 5827: 	str[lenp] = '|';
 5828: 	memcpy(&str[lenp + 1], token2, lenn);
 5829: 	str[lenn + lenp + 1] = 0;
 5830: 
 5831: 	atom->valuep = str;
 5832:     }
 5833:     snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
 5834:     err_msg[199] = 0;
 5835:     atom->valuep2 = xmlStrdup(err_msg);
 5836: 
 5837:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5838:         xmlRegFreeAtom(atom);
 5839: 	return(NULL);
 5840:     }
 5841:     am->negs++;
 5842:     if (to == NULL)
 5843: 	return(am->state);
 5844:     return(to);
 5845: }
 5846: 
 5847: /**
 5848:  * xmlAutomataNewCountTrans2:
 5849:  * @am: an automata
 5850:  * @from: the starting point of the transition
 5851:  * @to: the target point of the transition or NULL
 5852:  * @token: the input string associated to that transition
 5853:  * @token2: the second input string associated to that transition
 5854:  * @min:  the minimum successive occurences of token
 5855:  * @max:  the maximum successive occurences of token
 5856:  * @data:  data associated to the transition
 5857:  *
 5858:  * If @to is NULL, this creates first a new target state in the automata
 5859:  * and then adds a transition from the @from state to the target state
 5860:  * activated by a succession of input of value @token and @token2 and
 5861:  * whose number is between @min and @max
 5862:  *
 5863:  * Returns the target state or NULL in case of error
 5864:  */
 5865: xmlAutomataStatePtr
 5866: xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5867: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5868: 			 const xmlChar *token2,
 5869: 			 int min, int max, void *data) {
 5870:     xmlRegAtomPtr atom;
 5871:     int counter;
 5872: 
 5873:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5874: 	return(NULL);
 5875:     if (min < 0)
 5876: 	return(NULL);
 5877:     if ((max < min) || (max < 1))
 5878: 	return(NULL);
 5879:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5880:     if (atom == NULL)
 5881: 	return(NULL);
 5882:     if ((token2 == NULL) || (*token2 == 0)) {
 5883: 	atom->valuep = xmlStrdup(token);
 5884:     } else {
 5885: 	int lenn, lenp;
 5886: 	xmlChar *str;
 5887: 
 5888: 	lenn = strlen((char *) token2);
 5889: 	lenp = strlen((char *) token);
 5890: 
 5891: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5892: 	if (str == NULL) {
 5893: 	    xmlRegFreeAtom(atom);
 5894: 	    return(NULL);
 5895: 	}
 5896: 	memcpy(&str[0], token, lenp);
 5897: 	str[lenp] = '|';
 5898: 	memcpy(&str[lenp + 1], token2, lenn);
 5899: 	str[lenn + lenp + 1] = 0;
 5900: 
 5901: 	atom->valuep = str;
 5902:     }
 5903:     atom->data = data;
 5904:     if (min == 0)
 5905: 	atom->min = 1;
 5906:     else
 5907: 	atom->min = min;
 5908:     atom->max = max;
 5909: 
 5910:     /*
 5911:      * associate a counter to the transition.
 5912:      */
 5913:     counter = xmlRegGetCounter(am);
 5914:     am->counters[counter].min = min;
 5915:     am->counters[counter].max = max;
 5916: 
 5917:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 5918:     if (to == NULL) {
 5919:         to = xmlRegNewState(am);
 5920: 	xmlRegStatePush(am, to);
 5921:     }
 5922:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 5923:     xmlRegAtomPush(am, atom);
 5924:     am->state = to;
 5925: 
 5926:     if (to == NULL)
 5927: 	to = am->state;
 5928:     if (to == NULL)
 5929: 	return(NULL);
 5930:     if (min == 0)
 5931: 	xmlFAGenerateEpsilonTransition(am, from, to);
 5932:     return(to);
 5933: }
 5934: 
 5935: /**
 5936:  * xmlAutomataNewCountTrans:
 5937:  * @am: an automata
 5938:  * @from: the starting point of the transition
 5939:  * @to: the target point of the transition or NULL
 5940:  * @token: the input string associated to that transition
 5941:  * @min:  the minimum successive occurences of token
 5942:  * @max:  the maximum successive occurences of token
 5943:  * @data:  data associated to the transition
 5944:  *
 5945:  * If @to is NULL, this creates first a new target state in the automata
 5946:  * and then adds a transition from the @from state to the target state
 5947:  * activated by a succession of input of value @token and whose number
 5948:  * is between @min and @max
 5949:  *
 5950:  * Returns the target state or NULL in case of error
 5951:  */
 5952: xmlAutomataStatePtr
 5953: xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5954: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5955: 			 int min, int max, void *data) {
 5956:     xmlRegAtomPtr atom;
 5957:     int counter;
 5958: 
 5959:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5960: 	return(NULL);
 5961:     if (min < 0)
 5962: 	return(NULL);
 5963:     if ((max < min) || (max < 1))
 5964: 	return(NULL);
 5965:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5966:     if (atom == NULL)
 5967: 	return(NULL);
 5968:     atom->valuep = xmlStrdup(token);
 5969:     atom->data = data;
 5970:     if (min == 0)
 5971: 	atom->min = 1;
 5972:     else
 5973: 	atom->min = min;
 5974:     atom->max = max;
 5975: 
 5976:     /*
 5977:      * associate a counter to the transition.
 5978:      */
 5979:     counter = xmlRegGetCounter(am);
 5980:     am->counters[counter].min = min;
 5981:     am->counters[counter].max = max;
 5982: 
 5983:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 5984:     if (to == NULL) {
 5985:         to = xmlRegNewState(am);
 5986: 	xmlRegStatePush(am, to);
 5987:     }
 5988:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 5989:     xmlRegAtomPush(am, atom);
 5990:     am->state = to;
 5991: 
 5992:     if (to == NULL)
 5993: 	to = am->state;
 5994:     if (to == NULL)
 5995: 	return(NULL);
 5996:     if (min == 0)
 5997: 	xmlFAGenerateEpsilonTransition(am, from, to);
 5998:     return(to);
 5999: }
 6000: 
 6001: /**
 6002:  * xmlAutomataNewOnceTrans2:
 6003:  * @am: an automata
 6004:  * @from: the starting point of the transition
 6005:  * @to: the target point of the transition or NULL
 6006:  * @token: the input string associated to that transition
 6007:  * @token2: the second input string associated to that transition
 6008:  * @min:  the minimum successive occurences of token
 6009:  * @max:  the maximum successive occurences of token
 6010:  * @data:  data associated to the transition
 6011:  *
 6012:  * If @to is NULL, this creates first a new target state in the automata
 6013:  * and then adds a transition from the @from state to the target state
 6014:  * activated by a succession of input of value @token and @token2 and whose
 6015:  * number is between @min and @max, moreover that transition can only be
 6016:  * crossed once.
 6017:  *
 6018:  * Returns the target state or NULL in case of error
 6019:  */
 6020: xmlAutomataStatePtr
 6021: xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6022: 			 xmlAutomataStatePtr to, const xmlChar *token,
 6023: 			 const xmlChar *token2,
 6024: 			 int min, int max, void *data) {
 6025:     xmlRegAtomPtr atom;
 6026:     int counter;
 6027: 
 6028:     if ((am == NULL) || (from == NULL) || (token == NULL))
 6029: 	return(NULL);
 6030:     if (min < 1)
 6031: 	return(NULL);
 6032:     if ((max < min) || (max < 1))
 6033: 	return(NULL);
 6034:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 6035:     if (atom == NULL)
 6036: 	return(NULL);
 6037:     if ((token2 == NULL) || (*token2 == 0)) {
 6038: 	atom->valuep = xmlStrdup(token);
 6039:     } else {
 6040: 	int lenn, lenp;
 6041: 	xmlChar *str;
 6042: 
 6043: 	lenn = strlen((char *) token2);
 6044: 	lenp = strlen((char *) token);
 6045: 
 6046: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 6047: 	if (str == NULL) {
 6048: 	    xmlRegFreeAtom(atom);
 6049: 	    return(NULL);
 6050: 	}
 6051: 	memcpy(&str[0], token, lenp);
 6052: 	str[lenp] = '|';
 6053: 	memcpy(&str[lenp + 1], token2, lenn);
 6054: 	str[lenn + lenp + 1] = 0;
 6055: 
 6056: 	atom->valuep = str;
 6057:     }
 6058:     atom->data = data;
 6059:     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
 6060:     atom->min = min;
 6061:     atom->max = max;
 6062:     /*
 6063:      * associate a counter to the transition.
 6064:      */
 6065:     counter = xmlRegGetCounter(am);
 6066:     am->counters[counter].min = 1;
 6067:     am->counters[counter].max = 1;
 6068: 
 6069:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 6070:     if (to == NULL) {
 6071: 	to = xmlRegNewState(am);
 6072: 	xmlRegStatePush(am, to);
 6073:     }
 6074:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 6075:     xmlRegAtomPush(am, atom);
 6076:     am->state = to;
 6077:     return(to);
 6078: }
 6079: 
 6080: 
 6081: 
 6082: /**
 6083:  * xmlAutomataNewOnceTrans:
 6084:  * @am: an automata
 6085:  * @from: the starting point of the transition
 6086:  * @to: the target point of the transition or NULL
 6087:  * @token: the input string associated to that transition
 6088:  * @min:  the minimum successive occurences of token
 6089:  * @max:  the maximum successive occurences of token
 6090:  * @data:  data associated to the transition
 6091:  *
 6092:  * If @to is NULL, this creates first a new target state in the automata
 6093:  * and then adds a transition from the @from state to the target state
 6094:  * activated by a succession of input of value @token and whose number
 6095:  * is between @min and @max, moreover that transition can only be crossed
 6096:  * once.
 6097:  *
 6098:  * Returns the target state or NULL in case of error
 6099:  */
 6100: xmlAutomataStatePtr
 6101: xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6102: 			 xmlAutomataStatePtr to, const xmlChar *token,
 6103: 			 int min, int max, void *data) {
 6104:     xmlRegAtomPtr atom;
 6105:     int counter;
 6106: 
 6107:     if ((am == NULL) || (from == NULL) || (token == NULL))
 6108: 	return(NULL);
 6109:     if (min < 1)
 6110: 	return(NULL);
 6111:     if ((max < min) || (max < 1))
 6112: 	return(NULL);
 6113:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 6114:     if (atom == NULL)
 6115: 	return(NULL);
 6116:     atom->valuep = xmlStrdup(token);
 6117:     atom->data = data;
 6118:     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
 6119:     atom->min = min;
 6120:     atom->max = max;
 6121:     /*
 6122:      * associate a counter to the transition.
 6123:      */
 6124:     counter = xmlRegGetCounter(am);
 6125:     am->counters[counter].min = 1;
 6126:     am->counters[counter].max = 1;
 6127: 
 6128:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 6129:     if (to == NULL) {
 6130: 	to = xmlRegNewState(am);
 6131: 	xmlRegStatePush(am, to);
 6132:     }
 6133:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 6134:     xmlRegAtomPush(am, atom);
 6135:     am->state = to;
 6136:     return(to);
 6137: }
 6138: 
 6139: /**
 6140:  * xmlAutomataNewState:
 6141:  * @am: an automata
 6142:  *
 6143:  * Create a new disconnected state in the automata
 6144:  *
 6145:  * Returns the new state or NULL in case of error
 6146:  */
 6147: xmlAutomataStatePtr
 6148: xmlAutomataNewState(xmlAutomataPtr am) {
 6149:     xmlAutomataStatePtr to;
 6150: 
 6151:     if (am == NULL)
 6152: 	return(NULL);
 6153:     to = xmlRegNewState(am);
 6154:     xmlRegStatePush(am, to);
 6155:     return(to);
 6156: }
 6157: 
 6158: /**
 6159:  * xmlAutomataNewEpsilon:
 6160:  * @am: an automata
 6161:  * @from: the starting point of the transition
 6162:  * @to: the target point of the transition or NULL
 6163:  *
 6164:  * If @to is NULL, this creates first a new target state in the automata
 6165:  * and then adds an epsilon transition from the @from state to the
 6166:  * target state
 6167:  *
 6168:  * Returns the target state or NULL in case of error
 6169:  */
 6170: xmlAutomataStatePtr
 6171: xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6172: 		      xmlAutomataStatePtr to) {
 6173:     if ((am == NULL) || (from == NULL))
 6174: 	return(NULL);
 6175:     xmlFAGenerateEpsilonTransition(am, from, to);
 6176:     if (to == NULL)
 6177: 	return(am->state);
 6178:     return(to);
 6179: }
 6180: 
 6181: /**
 6182:  * xmlAutomataNewAllTrans:
 6183:  * @am: an automata
 6184:  * @from: the starting point of the transition
 6185:  * @to: the target point of the transition or NULL
 6186:  * @lax: allow to transition if not all all transitions have been activated
 6187:  *
 6188:  * If @to is NULL, this creates first a new target state in the automata
 6189:  * and then adds a an ALL transition from the @from state to the
 6190:  * target state. That transition is an epsilon transition allowed only when
 6191:  * all transitions from the @from node have been activated.
 6192:  *
 6193:  * Returns the target state or NULL in case of error
 6194:  */
 6195: xmlAutomataStatePtr
 6196: xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6197: 		       xmlAutomataStatePtr to, int lax) {
 6198:     if ((am == NULL) || (from == NULL))
 6199: 	return(NULL);
 6200:     xmlFAGenerateAllTransition(am, from, to, lax);
 6201:     if (to == NULL)
 6202: 	return(am->state);
 6203:     return(to);
 6204: }
 6205: 
 6206: /**
 6207:  * xmlAutomataNewCounter:
 6208:  * @am: an automata
 6209:  * @min:  the minimal value on the counter
 6210:  * @max:  the maximal value on the counter
 6211:  *
 6212:  * Create a new counter
 6213:  *
 6214:  * Returns the counter number or -1 in case of error
 6215:  */
 6216: int
 6217: xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
 6218:     int ret;
 6219: 
 6220:     if (am == NULL)
 6221: 	return(-1);
 6222: 
 6223:     ret = xmlRegGetCounter(am);
 6224:     if (ret < 0)
 6225: 	return(-1);
 6226:     am->counters[ret].min = min;
 6227:     am->counters[ret].max = max;
 6228:     return(ret);
 6229: }
 6230: 
 6231: /**
 6232:  * xmlAutomataNewCountedTrans:
 6233:  * @am: an automata
 6234:  * @from: the starting point of the transition
 6235:  * @to: the target point of the transition or NULL
 6236:  * @counter: the counter associated to that transition
 6237:  *
 6238:  * If @to is NULL, this creates first a new target state in the automata
 6239:  * and then adds an epsilon transition from the @from state to the target state
 6240:  * which will increment the counter provided
 6241:  *
 6242:  * Returns the target state or NULL in case of error
 6243:  */
 6244: xmlAutomataStatePtr
 6245: xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6246: 		xmlAutomataStatePtr to, int counter) {
 6247:     if ((am == NULL) || (from == NULL) || (counter < 0))
 6248: 	return(NULL);
 6249:     xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
 6250:     if (to == NULL)
 6251: 	return(am->state);
 6252:     return(to);
 6253: }
 6254: 
 6255: /**
 6256:  * xmlAutomataNewCounterTrans:
 6257:  * @am: an automata
 6258:  * @from: the starting point of the transition
 6259:  * @to: the target point of the transition or NULL
 6260:  * @counter: the counter associated to that transition
 6261:  *
 6262:  * If @to is NULL, this creates first a new target state in the automata
 6263:  * and then adds an epsilon transition from the @from state to the target state
 6264:  * which will be allowed only if the counter is within the right range.
 6265:  *
 6266:  * Returns the target state or NULL in case of error
 6267:  */
 6268: xmlAutomataStatePtr
 6269: xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6270: 		xmlAutomataStatePtr to, int counter) {
 6271:     if ((am == NULL) || (from == NULL) || (counter < 0))
 6272: 	return(NULL);
 6273:     xmlFAGenerateCountedTransition(am, from, to, counter);
 6274:     if (to == NULL)
 6275: 	return(am->state);
 6276:     return(to);
 6277: }
 6278: 
 6279: /**
 6280:  * xmlAutomataCompile:
 6281:  * @am: an automata
 6282:  *
 6283:  * Compile the automata into a Reg Exp ready for being executed.
 6284:  * The automata should be free after this point.
 6285:  *
 6286:  * Returns the compiled regexp or NULL in case of error
 6287:  */
 6288: xmlRegexpPtr
 6289: xmlAutomataCompile(xmlAutomataPtr am) {
 6290:     xmlRegexpPtr ret;
 6291: 
 6292:     if ((am == NULL) || (am->error != 0)) return(NULL);
 6293:     xmlFAEliminateEpsilonTransitions(am);
 6294:     /* xmlFAComputesDeterminism(am); */
 6295:     ret = xmlRegEpxFromParse(am);
 6296: 
 6297:     return(ret);
 6298: }
 6299: 
 6300: /**
 6301:  * xmlAutomataIsDeterminist:
 6302:  * @am: an automata
 6303:  *
 6304:  * Checks if an automata is determinist.
 6305:  *
 6306:  * Returns 1 if true, 0 if not, and -1 in case of error
 6307:  */
 6308: int
 6309: xmlAutomataIsDeterminist(xmlAutomataPtr am) {
 6310:     int ret;
 6311: 
 6312:     if (am == NULL)
 6313: 	return(-1);
 6314: 
 6315:     ret = xmlFAComputesDeterminism(am);
 6316:     return(ret);
 6317: }
 6318: #endif /* LIBXML_AUTOMATA_ENABLED */
 6319: 
 6320: #ifdef LIBXML_EXPR_ENABLED
 6321: /************************************************************************
 6322:  *									*
 6323:  *		Formal Expression handling code				*
 6324:  *									*
 6325:  ************************************************************************/
 6326: /************************************************************************
 6327:  *									*
 6328:  *		Expression handling context				*
 6329:  *									*
 6330:  ************************************************************************/
 6331: 
 6332: struct _xmlExpCtxt {
 6333:     xmlDictPtr dict;
 6334:     xmlExpNodePtr *table;
 6335:     int size;
 6336:     int nbElems;
 6337:     int nb_nodes;
 6338:     int maxNodes;
 6339:     const char *expr;
 6340:     const char *cur;
 6341:     int nb_cons;
 6342:     int tabSize;
 6343: };
 6344: 
 6345: /**
 6346:  * xmlExpNewCtxt:
 6347:  * @maxNodes:  the maximum number of nodes
 6348:  * @dict:  optional dictionnary to use internally
 6349:  *
 6350:  * Creates a new context for manipulating expressions
 6351:  *
 6352:  * Returns the context or NULL in case of error
 6353:  */
 6354: xmlExpCtxtPtr
 6355: xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
 6356:     xmlExpCtxtPtr ret;
 6357:     int size = 256;
 6358: 
 6359:     if (maxNodes <= 4096)
 6360:         maxNodes = 4096;
 6361: 
 6362:     ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
 6363:     if (ret == NULL)
 6364:         return(NULL);
 6365:     memset(ret, 0, sizeof(xmlExpCtxt));
 6366:     ret->size = size;
 6367:     ret->nbElems = 0;
 6368:     ret->maxNodes = maxNodes;
 6369:     ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
 6370:     if (ret->table == NULL) {
 6371:         xmlFree(ret);
 6372: 	return(NULL);
 6373:     }
 6374:     memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
 6375:     if (dict == NULL) {
 6376:         ret->dict = xmlDictCreate();
 6377: 	if (ret->dict == NULL) {
 6378: 	    xmlFree(ret->table);
 6379: 	    xmlFree(ret);
 6380: 	    return(NULL);
 6381: 	}
 6382:     } else {
 6383:         ret->dict = dict;
 6384: 	xmlDictReference(ret->dict);
 6385:     }
 6386:     return(ret);
 6387: }
 6388: 
 6389: /**
 6390:  * xmlExpFreeCtxt:
 6391:  * @ctxt:  an expression context
 6392:  *
 6393:  * Free an expression context
 6394:  */
 6395: void
 6396: xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
 6397:     if (ctxt == NULL)
 6398:         return;
 6399:     xmlDictFree(ctxt->dict);
 6400:     if (ctxt->table != NULL)
 6401: 	xmlFree(ctxt->table);
 6402:     xmlFree(ctxt);
 6403: }
 6404: 
 6405: /************************************************************************
 6406:  *									*
 6407:  *		Structure associated to an expression node		*
 6408:  *									*
 6409:  ************************************************************************/
 6410: #define MAX_NODES 10000
 6411: 
 6412: /* #define DEBUG_DERIV */
 6413: 
 6414: /*
 6415:  * TODO:
 6416:  * - Wildcards
 6417:  * - public API for creation
 6418:  *
 6419:  * Started
 6420:  * - regression testing
 6421:  *
 6422:  * Done
 6423:  * - split into module and test tool
 6424:  * - memleaks
 6425:  */
 6426: 
 6427: typedef enum {
 6428:     XML_EXP_NILABLE = (1 << 0)
 6429: } xmlExpNodeInfo;
 6430: 
 6431: #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
 6432: 
 6433: struct _xmlExpNode {
 6434:     unsigned char type;/* xmlExpNodeType */
 6435:     unsigned char info;/* OR of xmlExpNodeInfo */
 6436:     unsigned short key;	/* the hash key */
 6437:     unsigned int ref;	/* The number of references */
 6438:     int c_max;		/* the maximum length it can consume */
 6439:     xmlExpNodePtr exp_left;
 6440:     xmlExpNodePtr next;/* the next node in the hash table or free list */
 6441:     union {
 6442: 	struct {
 6443: 	    int f_min;
 6444: 	    int f_max;
 6445: 	} count;
 6446: 	struct {
 6447: 	    xmlExpNodePtr f_right;
 6448: 	} children;
 6449:         const xmlChar *f_str;
 6450:     } field;
 6451: };
 6452: 
 6453: #define exp_min field.count.f_min
 6454: #define exp_max field.count.f_max
 6455: /* #define exp_left field.children.f_left */
 6456: #define exp_right field.children.f_right
 6457: #define exp_str field.f_str
 6458: 
 6459: static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
 6460: static xmlExpNode forbiddenExpNode = {
 6461:     XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
 6462: };
 6463: xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
 6464: static xmlExpNode emptyExpNode = {
 6465:     XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
 6466: };
 6467: xmlExpNodePtr emptyExp = &emptyExpNode;
 6468: 
 6469: /************************************************************************
 6470:  *									*
 6471:  *  The custom hash table for unicity and canonicalization		*
 6472:  *  of sub-expressions pointers						*
 6473:  *									*
 6474:  ************************************************************************/
 6475: /*
 6476:  * xmlExpHashNameComputeKey:
 6477:  * Calculate the hash key for a token
 6478:  */
 6479: static unsigned short
 6480: xmlExpHashNameComputeKey(const xmlChar *name) {
 6481:     unsigned short value = 0L;
 6482:     char ch;
 6483: 
 6484:     if (name != NULL) {
 6485: 	value += 30 * (*name);
 6486: 	while ((ch = *name++) != 0) {
 6487: 	    value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
 6488: 	}
 6489:     }
 6490:     return (value);
 6491: }
 6492: 
 6493: /*
 6494:  * xmlExpHashComputeKey:
 6495:  * Calculate the hash key for a compound expression
 6496:  */
 6497: static unsigned short
 6498: xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
 6499:                      xmlExpNodePtr right) {
 6500:     unsigned long value;
 6501:     unsigned short ret;
 6502: 
 6503:     switch (type) {
 6504:         case XML_EXP_SEQ:
 6505: 	    value = left->key;
 6506: 	    value += right->key;
 6507: 	    value *= 3;
 6508: 	    ret = (unsigned short) value;
 6509: 	    break;
 6510:         case XML_EXP_OR:
 6511: 	    value = left->key;
 6512: 	    value += right->key;
 6513: 	    value *= 7;
 6514: 	    ret = (unsigned short) value;
 6515: 	    break;
 6516:         case XML_EXP_COUNT:
 6517: 	    value = left->key;
 6518: 	    value += right->key;
 6519: 	    ret = (unsigned short) value;
 6520: 	    break;
 6521: 	default:
 6522: 	    ret = 0;
 6523:     }
 6524:     return(ret);
 6525: }
 6526: 
 6527: 
 6528: static xmlExpNodePtr
 6529: xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
 6530:     xmlExpNodePtr ret;
 6531: 
 6532:     if (ctxt->nb_nodes >= MAX_NODES)
 6533:         return(NULL);
 6534:     ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
 6535:     if (ret == NULL)
 6536:         return(NULL);
 6537:     memset(ret, 0, sizeof(xmlExpNode));
 6538:     ret->type = type;
 6539:     ret->next = NULL;
 6540:     ctxt->nb_nodes++;
 6541:     ctxt->nb_cons++;
 6542:     return(ret);
 6543: }
 6544: 
 6545: /**
 6546:  * xmlExpHashGetEntry:
 6547:  * @table: the hash table
 6548:  *
 6549:  * Get the unique entry from the hash table. The entry is created if
 6550:  * needed. @left and @right are consumed, i.e. their ref count will
 6551:  * be decremented by the operation.
 6552:  *
 6553:  * Returns the pointer or NULL in case of error
 6554:  */
 6555: static xmlExpNodePtr
 6556: xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
 6557:                    xmlExpNodePtr left, xmlExpNodePtr right,
 6558: 		   const xmlChar *name, int min, int max) {
 6559:     unsigned short kbase, key;
 6560:     xmlExpNodePtr entry;
 6561:     xmlExpNodePtr insert;
 6562: 
 6563:     if (ctxt == NULL)
 6564: 	return(NULL);
 6565: 
 6566:     /*
 6567:      * Check for duplicate and insertion location.
 6568:      */
 6569:     if (type == XML_EXP_ATOM) {
 6570: 	kbase = xmlExpHashNameComputeKey(name);
 6571:     } else if (type == XML_EXP_COUNT) {
 6572:         /* COUNT reduction rule 1 */
 6573: 	/* a{1} -> a */
 6574: 	if (min == max) {
 6575: 	    if (min == 1) {
 6576: 		return(left);
 6577: 	    }
 6578: 	    if (min == 0) {
 6579: 		xmlExpFree(ctxt, left);
 6580: 	        return(emptyExp);
 6581: 	    }
 6582: 	}
 6583: 	if (min < 0) {
 6584: 	    xmlExpFree(ctxt, left);
 6585: 	    return(forbiddenExp);
 6586: 	}
 6587:         if (max == -1)
 6588: 	    kbase = min + 79;
 6589: 	else
 6590: 	    kbase = max - min;
 6591: 	kbase += left->key;
 6592:     } else if (type == XML_EXP_OR) {
 6593:         /* Forbid reduction rules */
 6594:         if (left->type == XML_EXP_FORBID) {
 6595: 	    xmlExpFree(ctxt, left);
 6596: 	    return(right);
 6597: 	}
 6598:         if (right->type == XML_EXP_FORBID) {
 6599: 	    xmlExpFree(ctxt, right);
 6600: 	    return(left);
 6601: 	}
 6602: 
 6603:         /* OR reduction rule 1 */
 6604: 	/* a | a reduced to a */
 6605:         if (left == right) {
 6606: 	    left->ref--;
 6607: 	    return(left);
 6608: 	}
 6609:         /* OR canonicalization rule 1 */
 6610: 	/* linearize (a | b) | c into a | (b | c) */
 6611:         if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
 6612: 	    xmlExpNodePtr tmp = left;
 6613:             left = right;
 6614: 	    right = tmp;
 6615: 	}
 6616:         /* OR reduction rule 2 */
 6617: 	/* a | (a | b) and b | (a | b) are reduced to a | b */
 6618:         if (right->type == XML_EXP_OR) {
 6619: 	    if ((left == right->exp_left) ||
 6620: 	        (left == right->exp_right)) {
 6621: 		xmlExpFree(ctxt, left);
 6622: 		return(right);
 6623: 	    }
 6624: 	}
 6625:         /* OR canonicalization rule 2 */
 6626: 	/* linearize (a | b) | c into a | (b | c) */
 6627:         if (left->type == XML_EXP_OR) {
 6628: 	    xmlExpNodePtr tmp;
 6629: 
 6630: 	    /* OR canonicalization rule 2 */
 6631: 	    if ((left->exp_right->type != XML_EXP_OR) &&
 6632: 	        (left->exp_right->key < left->exp_left->key)) {
 6633: 	        tmp = left->exp_right;
 6634: 		left->exp_right = left->exp_left;
 6635: 		left->exp_left = tmp;
 6636: 	    }
 6637: 	    left->exp_right->ref++;
 6638: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
 6639: 	                             NULL, 0, 0);
 6640: 	    left->exp_left->ref++;
 6641: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
 6642: 	                             NULL, 0, 0);
 6643: 
 6644: 	    xmlExpFree(ctxt, left);
 6645: 	    return(tmp);
 6646: 	}
 6647: 	if (right->type == XML_EXP_OR) {
 6648: 	    /* Ordering in the tree */
 6649: 	    /* C | (A | B) -> A | (B | C) */
 6650: 	    if (left->key > right->exp_right->key) {
 6651: 		xmlExpNodePtr tmp;
 6652: 		right->exp_right->ref++;
 6653: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
 6654: 		                         left, NULL, 0, 0);
 6655: 		right->exp_left->ref++;
 6656: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
 6657: 		                         tmp, NULL, 0, 0);
 6658: 		xmlExpFree(ctxt, right);
 6659: 		return(tmp);
 6660: 	    }
 6661: 	    /* Ordering in the tree */
 6662: 	    /* B | (A | C) -> A | (B | C) */
 6663: 	    if (left->key > right->exp_left->key) {
 6664: 		xmlExpNodePtr tmp;
 6665: 		right->exp_right->ref++;
 6666: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
 6667: 		                         right->exp_right, NULL, 0, 0);
 6668: 		right->exp_left->ref++;
 6669: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
 6670: 		                         tmp, NULL, 0, 0);
 6671: 		xmlExpFree(ctxt, right);
 6672: 		return(tmp);
 6673: 	    }
 6674: 	}
 6675: 	/* we know both types are != XML_EXP_OR here */
 6676:         else if (left->key > right->key) {
 6677: 	    xmlExpNodePtr tmp = left;
 6678:             left = right;
 6679: 	    right = tmp;
 6680: 	}
 6681: 	kbase = xmlExpHashComputeKey(type, left, right);
 6682:     } else if (type == XML_EXP_SEQ) {
 6683:         /* Forbid reduction rules */
 6684:         if (left->type == XML_EXP_FORBID) {
 6685: 	    xmlExpFree(ctxt, right);
 6686: 	    return(left);
 6687: 	}
 6688:         if (right->type == XML_EXP_FORBID) {
 6689: 	    xmlExpFree(ctxt, left);
 6690: 	    return(right);
 6691: 	}
 6692:         /* Empty reduction rules */
 6693:         if (right->type == XML_EXP_EMPTY) {
 6694: 	    return(left);
 6695: 	}
 6696:         if (left->type == XML_EXP_EMPTY) {
 6697: 	    return(right);
 6698: 	}
 6699: 	kbase = xmlExpHashComputeKey(type, left, right);
 6700:     } else
 6701:         return(NULL);
 6702: 
 6703:     key = kbase % ctxt->size;
 6704:     if (ctxt->table[key] != NULL) {
 6705: 	for (insert = ctxt->table[key]; insert != NULL;
 6706: 	     insert = insert->next) {
 6707: 	    if ((insert->key == kbase) &&
 6708: 	        (insert->type == type)) {
 6709: 		if (type == XML_EXP_ATOM) {
 6710: 		    if (name == insert->exp_str) {
 6711: 			insert->ref++;
 6712: 			return(insert);
 6713: 		    }
 6714: 		} else if (type == XML_EXP_COUNT) {
 6715: 		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
 6716: 		        (insert->exp_left == left)) {
 6717: 			insert->ref++;
 6718: 			left->ref--;
 6719: 			return(insert);
 6720: 		    }
 6721: 		} else if ((insert->exp_left == left) &&
 6722: 			   (insert->exp_right == right)) {
 6723: 		    insert->ref++;
 6724: 		    left->ref--;
 6725: 		    right->ref--;
 6726: 		    return(insert);
 6727: 		}
 6728: 	    }
 6729: 	}
 6730:     }
 6731: 
 6732:     entry = xmlExpNewNode(ctxt, type);
 6733:     if (entry == NULL)
 6734:         return(NULL);
 6735:     entry->key = kbase;
 6736:     if (type == XML_EXP_ATOM) {
 6737: 	entry->exp_str = name;
 6738: 	entry->c_max = 1;
 6739:     } else if (type == XML_EXP_COUNT) {
 6740:         entry->exp_min = min;
 6741:         entry->exp_max = max;
 6742: 	entry->exp_left = left;
 6743: 	if ((min == 0) || (IS_NILLABLE(left)))
 6744: 	    entry->info |= XML_EXP_NILABLE;
 6745: 	if (max < 0)
 6746: 	    entry->c_max = -1;
 6747: 	else
 6748: 	    entry->c_max = max * entry->exp_left->c_max;
 6749:     } else {
 6750: 	entry->exp_left = left;
 6751: 	entry->exp_right = right;
 6752: 	if (type == XML_EXP_OR) {
 6753: 	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
 6754: 		entry->info |= XML_EXP_NILABLE;
 6755: 	    if ((entry->exp_left->c_max == -1) ||
 6756: 	        (entry->exp_right->c_max == -1))
 6757: 		entry->c_max = -1;
 6758: 	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
 6759: 	        entry->c_max = entry->exp_left->c_max;
 6760: 	    else
 6761: 	        entry->c_max = entry->exp_right->c_max;
 6762: 	} else {
 6763: 	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
 6764: 		entry->info |= XML_EXP_NILABLE;
 6765: 	    if ((entry->exp_left->c_max == -1) ||
 6766: 	        (entry->exp_right->c_max == -1))
 6767: 		entry->c_max = -1;
 6768: 	    else
 6769: 	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
 6770: 	}
 6771:     }
 6772:     entry->ref = 1;
 6773:     if (ctxt->table[key] != NULL)
 6774:         entry->next = ctxt->table[key];
 6775: 
 6776:     ctxt->table[key] = entry;
 6777:     ctxt->nbElems++;
 6778: 
 6779:     return(entry);
 6780: }
 6781: 
 6782: /**
 6783:  * xmlExpFree:
 6784:  * @ctxt: the expression context
 6785:  * @exp: the expression
 6786:  *
 6787:  * Dereference the expression
 6788:  */
 6789: void
 6790: xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
 6791:     if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
 6792:         return;
 6793:     exp->ref--;
 6794:     if (exp->ref == 0) {
 6795:         unsigned short key;
 6796: 
 6797:         /* Unlink it first from the hash table */
 6798: 	key = exp->key % ctxt->size;
 6799: 	if (ctxt->table[key] == exp) {
 6800: 	    ctxt->table[key] = exp->next;
 6801: 	} else {
 6802: 	    xmlExpNodePtr tmp;
 6803: 
 6804: 	    tmp = ctxt->table[key];
 6805: 	    while (tmp != NULL) {
 6806: 	        if (tmp->next == exp) {
 6807: 		    tmp->next = exp->next;
 6808: 		    break;
 6809: 		}
 6810: 	        tmp = tmp->next;
 6811: 	    }
 6812: 	}
 6813: 
 6814:         if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
 6815: 	    xmlExpFree(ctxt, exp->exp_left);
 6816: 	    xmlExpFree(ctxt, exp->exp_right);
 6817: 	} else if (exp->type == XML_EXP_COUNT) {
 6818: 	    xmlExpFree(ctxt, exp->exp_left);
 6819: 	}
 6820:         xmlFree(exp);
 6821: 	ctxt->nb_nodes--;
 6822:     }
 6823: }
 6824: 
 6825: /**
 6826:  * xmlExpRef:
 6827:  * @exp: the expression
 6828:  *
 6829:  * Increase the reference count of the expression
 6830:  */
 6831: void
 6832: xmlExpRef(xmlExpNodePtr exp) {
 6833:     if (exp != NULL)
 6834:         exp->ref++;
 6835: }
 6836: 
 6837: /**
 6838:  * xmlExpNewAtom:
 6839:  * @ctxt: the expression context
 6840:  * @name: the atom name
 6841:  * @len: the atom name length in byte (or -1);
 6842:  *
 6843:  * Get the atom associated to this name from that context
 6844:  *
 6845:  * Returns the node or NULL in case of error
 6846:  */
 6847: xmlExpNodePtr
 6848: xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
 6849:     if ((ctxt == NULL) || (name == NULL))
 6850:         return(NULL);
 6851:     name = xmlDictLookup(ctxt->dict, name, len);
 6852:     if (name == NULL)
 6853:         return(NULL);
 6854:     return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
 6855: }
 6856: 
 6857: /**
 6858:  * xmlExpNewOr:
 6859:  * @ctxt: the expression context
 6860:  * @left: left expression
 6861:  * @right: right expression
 6862:  *
 6863:  * Get the atom associated to the choice @left | @right
 6864:  * Note that @left and @right are consumed in the operation, to keep
 6865:  * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
 6866:  * this is true even in case of failure (unless ctxt == NULL).
 6867:  *
 6868:  * Returns the node or NULL in case of error
 6869:  */
 6870: xmlExpNodePtr
 6871: xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
 6872:     if (ctxt == NULL)
 6873:         return(NULL);
 6874:     if ((left == NULL) || (right == NULL)) {
 6875:         xmlExpFree(ctxt, left);
 6876:         xmlExpFree(ctxt, right);
 6877:         return(NULL);
 6878:     }
 6879:     return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
 6880: }
 6881: 
 6882: /**
 6883:  * xmlExpNewSeq:
 6884:  * @ctxt: the expression context
 6885:  * @left: left expression
 6886:  * @right: right expression
 6887:  *
 6888:  * Get the atom associated to the sequence @left , @right
 6889:  * Note that @left and @right are consumed in the operation, to keep
 6890:  * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
 6891:  * this is true even in case of failure (unless ctxt == NULL).
 6892:  *
 6893:  * Returns the node or NULL in case of error
 6894:  */
 6895: xmlExpNodePtr
 6896: xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
 6897:     if (ctxt == NULL)
 6898:         return(NULL);
 6899:     if ((left == NULL) || (right == NULL)) {
 6900:         xmlExpFree(ctxt, left);
 6901:         xmlExpFree(ctxt, right);
 6902:         return(NULL);
 6903:     }
 6904:     return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
 6905: }
 6906: 
 6907: /**
 6908:  * xmlExpNewRange:
 6909:  * @ctxt: the expression context
 6910:  * @subset: the expression to be repeated
 6911:  * @min: the lower bound for the repetition
 6912:  * @max: the upper bound for the repetition, -1 means infinite
 6913:  *
 6914:  * Get the atom associated to the range (@subset){@min, @max}
 6915:  * Note that @subset is consumed in the operation, to keep
 6916:  * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
 6917:  * this is true even in case of failure (unless ctxt == NULL).
 6918:  *
 6919:  * Returns the node or NULL in case of error
 6920:  */
 6921: xmlExpNodePtr
 6922: xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
 6923:     if (ctxt == NULL)
 6924:         return(NULL);
 6925:     if ((subset == NULL) || (min < 0) || (max < -1) ||
 6926:         ((max >= 0) && (min > max))) {
 6927: 	xmlExpFree(ctxt, subset);
 6928:         return(NULL);
 6929:     }
 6930:     return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
 6931:                               NULL, NULL, min, max));
 6932: }
 6933: 
 6934: /************************************************************************
 6935:  *									*
 6936:  *		Public API for operations on expressions		*
 6937:  *									*
 6938:  ************************************************************************/
 6939: 
 6940: static int
 6941: xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 6942:                      const xmlChar**list, int len, int nb) {
 6943:     int tmp, tmp2;
 6944: tail:
 6945:     switch (exp->type) {
 6946:         case XML_EXP_EMPTY:
 6947: 	    return(0);
 6948:         case XML_EXP_ATOM:
 6949: 	    for (tmp = 0;tmp < nb;tmp++)
 6950: 	        if (list[tmp] == exp->exp_str)
 6951: 		    return(0);
 6952:             if (nb >= len)
 6953: 	        return(-2);
 6954: 	    list[nb] = exp->exp_str;
 6955: 	    return(1);
 6956:         case XML_EXP_COUNT:
 6957: 	    exp = exp->exp_left;
 6958: 	    goto tail;
 6959:         case XML_EXP_SEQ:
 6960:         case XML_EXP_OR:
 6961: 	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
 6962: 	    if (tmp < 0)
 6963: 	        return(tmp);
 6964: 	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
 6965: 	                                nb + tmp);
 6966: 	    if (tmp2 < 0)
 6967: 	        return(tmp2);
 6968:             return(tmp + tmp2);
 6969:     }
 6970:     return(-1);
 6971: }
 6972: 
 6973: /**
 6974:  * xmlExpGetLanguage:
 6975:  * @ctxt: the expression context
 6976:  * @exp: the expression
 6977:  * @langList: where to store the tokens
 6978:  * @len: the allocated length of @list
 6979:  *
 6980:  * Find all the strings used in @exp and store them in @list
 6981:  *
 6982:  * Returns the number of unique strings found, -1 in case of errors and
 6983:  *         -2 if there is more than @len strings
 6984:  */
 6985: int
 6986: xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 6987:                   const xmlChar**langList, int len) {
 6988:     if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
 6989:         return(-1);
 6990:     return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
 6991: }
 6992: 
 6993: static int
 6994: xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 6995:                   const xmlChar**list, int len, int nb) {
 6996:     int tmp, tmp2;
 6997: tail:
 6998:     switch (exp->type) {
 6999:         case XML_EXP_FORBID:
 7000: 	    return(0);
 7001:         case XML_EXP_EMPTY:
 7002: 	    return(0);
 7003:         case XML_EXP_ATOM:
 7004: 	    for (tmp = 0;tmp < nb;tmp++)
 7005: 	        if (list[tmp] == exp->exp_str)
 7006: 		    return(0);
 7007:             if (nb >= len)
 7008: 	        return(-2);
 7009: 	    list[nb] = exp->exp_str;
 7010: 	    return(1);
 7011:         case XML_EXP_COUNT:
 7012: 	    exp = exp->exp_left;
 7013: 	    goto tail;
 7014:         case XML_EXP_SEQ:
 7015: 	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
 7016: 	    if (tmp < 0)
 7017: 	        return(tmp);
 7018: 	    if (IS_NILLABLE(exp->exp_left)) {
 7019: 		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
 7020: 					    nb + tmp);
 7021: 		if (tmp2 < 0)
 7022: 		    return(tmp2);
 7023: 		tmp += tmp2;
 7024: 	    }
 7025:             return(tmp);
 7026:         case XML_EXP_OR:
 7027: 	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
 7028: 	    if (tmp < 0)
 7029: 	        return(tmp);
 7030: 	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
 7031: 	                                nb + tmp);
 7032: 	    if (tmp2 < 0)
 7033: 	        return(tmp2);
 7034:             return(tmp + tmp2);
 7035:     }
 7036:     return(-1);
 7037: }
 7038: 
 7039: /**
 7040:  * xmlExpGetStart:
 7041:  * @ctxt: the expression context
 7042:  * @exp: the expression
 7043:  * @tokList: where to store the tokens
 7044:  * @len: the allocated length of @list
 7045:  *
 7046:  * Find all the strings that appears at the start of the languages
 7047:  * accepted by @exp and store them in @list. E.g. for (a, b) | c
 7048:  * it will return the list [a, c]
 7049:  *
 7050:  * Returns the number of unique strings found, -1 in case of errors and
 7051:  *         -2 if there is more than @len strings
 7052:  */
 7053: int
 7054: xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7055:                const xmlChar**tokList, int len) {
 7056:     if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
 7057:         return(-1);
 7058:     return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
 7059: }
 7060: 
 7061: /**
 7062:  * xmlExpIsNillable:
 7063:  * @exp: the expression
 7064:  *
 7065:  * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
 7066:  *
 7067:  * Returns 1 if nillable, 0 if not and -1 in case of error
 7068:  */
 7069: int
 7070: xmlExpIsNillable(xmlExpNodePtr exp) {
 7071:     if (exp == NULL)
 7072:         return(-1);
 7073:     return(IS_NILLABLE(exp) != 0);
 7074: }
 7075: 
 7076: static xmlExpNodePtr
 7077: xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
 7078: {
 7079:     xmlExpNodePtr ret;
 7080: 
 7081:     switch (exp->type) {
 7082: 	case XML_EXP_EMPTY:
 7083: 	    return(forbiddenExp);
 7084: 	case XML_EXP_FORBID:
 7085: 	    return(forbiddenExp);
 7086: 	case XML_EXP_ATOM:
 7087: 	    if (exp->exp_str == str) {
 7088: #ifdef DEBUG_DERIV
 7089: 		printf("deriv atom: equal => Empty\n");
 7090: #endif
 7091: 	        ret = emptyExp;
 7092: 	    } else {
 7093: #ifdef DEBUG_DERIV
 7094: 		printf("deriv atom: mismatch => forbid\n");
 7095: #endif
 7096: 	        /* TODO wildcards here */
 7097: 		ret = forbiddenExp;
 7098: 	    }
 7099: 	    return(ret);
 7100: 	case XML_EXP_OR: {
 7101: 	    xmlExpNodePtr tmp;
 7102: 
 7103: #ifdef DEBUG_DERIV
 7104: 	    printf("deriv or: => or(derivs)\n");
 7105: #endif
 7106: 	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7107: 	    if (tmp == NULL) {
 7108: 		return(NULL);
 7109: 	    }
 7110: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
 7111: 	    if (ret == NULL) {
 7112: 	        xmlExpFree(ctxt, tmp);
 7113: 		return(NULL);
 7114: 	    }
 7115:             ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
 7116: 			     NULL, 0, 0);
 7117: 	    return(ret);
 7118: 	}
 7119: 	case XML_EXP_SEQ:
 7120: #ifdef DEBUG_DERIV
 7121: 	    printf("deriv seq: starting with left\n");
 7122: #endif
 7123: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7124: 	    if (ret == NULL) {
 7125: 	        return(NULL);
 7126: 	    } else if (ret == forbiddenExp) {
 7127: 	        if (IS_NILLABLE(exp->exp_left)) {
 7128: #ifdef DEBUG_DERIV
 7129: 		    printf("deriv seq: left failed but nillable\n");
 7130: #endif
 7131: 		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
 7132: 		}
 7133: 	    } else {
 7134: #ifdef DEBUG_DERIV
 7135: 		printf("deriv seq: left match => sequence\n");
 7136: #endif
 7137: 	        exp->exp_right->ref++;
 7138: 	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
 7139: 		                         NULL, 0, 0);
 7140: 	    }
 7141: 	    return(ret);
 7142: 	case XML_EXP_COUNT: {
 7143: 	    int min, max;
 7144: 	    xmlExpNodePtr tmp;
 7145: 
 7146: 	    if (exp->exp_max == 0)
 7147: 		return(forbiddenExp);
 7148: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7149: 	    if (ret == NULL)
 7150: 	        return(NULL);
 7151: 	    if (ret == forbiddenExp) {
 7152: #ifdef DEBUG_DERIV
 7153: 		printf("deriv count: pattern mismatch => forbid\n");
 7154: #endif
 7155: 	        return(ret);
 7156: 	    }
 7157: 	    if (exp->exp_max == 1)
 7158: 		return(ret);
 7159: 	    if (exp->exp_max < 0) /* unbounded */
 7160: 		max = -1;
 7161: 	    else
 7162: 		max = exp->exp_max - 1;
 7163: 	    if (exp->exp_min > 0)
 7164: 		min = exp->exp_min - 1;
 7165: 	    else
 7166: 		min = 0;
 7167: 	    exp->exp_left->ref++;
 7168: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
 7169: 				     NULL, min, max);
 7170: 	    if (ret == emptyExp) {
 7171: #ifdef DEBUG_DERIV
 7172: 		printf("deriv count: match to empty => new count\n");
 7173: #endif
 7174: 	        return(tmp);
 7175: 	    }
 7176: #ifdef DEBUG_DERIV
 7177: 	    printf("deriv count: match => sequence with new count\n");
 7178: #endif
 7179: 	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
 7180: 	                              NULL, 0, 0));
 7181: 	}
 7182:     }
 7183:     return(NULL);
 7184: }
 7185: 
 7186: /**
 7187:  * xmlExpStringDerive:
 7188:  * @ctxt: the expression context
 7189:  * @exp: the expression
 7190:  * @str: the string
 7191:  * @len: the string len in bytes if available
 7192:  *
 7193:  * Do one step of Brzozowski derivation of the expression @exp with
 7194:  * respect to the input string
 7195:  *
 7196:  * Returns the resulting expression or NULL in case of internal error
 7197:  */
 7198: xmlExpNodePtr
 7199: xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7200:                    const xmlChar *str, int len) {
 7201:     const xmlChar *input;
 7202: 
 7203:     if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
 7204:         return(NULL);
 7205:     }
 7206:     /*
 7207:      * check the string is in the dictionnary, if yes use an interned
 7208:      * copy, otherwise we know it's not an acceptable input
 7209:      */
 7210:     input = xmlDictExists(ctxt->dict, str, len);
 7211:     if (input == NULL) {
 7212:         return(forbiddenExp);
 7213:     }
 7214:     return(xmlExpStringDeriveInt(ctxt, exp, input));
 7215: }
 7216: 
 7217: static int
 7218: xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7219:     int ret = 1;
 7220: 
 7221:     if (sub->c_max == -1) {
 7222:         if (exp->c_max != -1)
 7223: 	    ret = 0;
 7224:     } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
 7225:         ret = 0;
 7226:     }
 7227: #if 0
 7228:     if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
 7229:         ret = 0;
 7230: #endif
 7231:     return(ret);
 7232: }
 7233: 
 7234: static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7235:                                         xmlExpNodePtr sub);
 7236: /**
 7237:  * xmlExpDivide:
 7238:  * @ctxt: the expressions context
 7239:  * @exp: the englobing expression
 7240:  * @sub: the subexpression
 7241:  * @mult: the multiple expression
 7242:  * @remain: the remain from the derivation of the multiple
 7243:  *
 7244:  * Check if exp is a multiple of sub, i.e. if there is a finite number n
 7245:  * so that sub{n} subsume exp
 7246:  *
 7247:  * Returns the multiple value if successful, 0 if it is not a multiple
 7248:  *         and -1 in case of internel error.
 7249:  */
 7250: 
 7251: static int
 7252: xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
 7253:              xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
 7254:     int i;
 7255:     xmlExpNodePtr tmp, tmp2;
 7256: 
 7257:     if (mult != NULL) *mult = NULL;
 7258:     if (remain != NULL) *remain = NULL;
 7259:     if (exp->c_max == -1) return(0);
 7260:     if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
 7261: 
 7262:     for (i = 1;i <= exp->c_max;i++) {
 7263:         sub->ref++;
 7264:         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
 7265: 				 sub, NULL, NULL, i, i);
 7266: 	if (tmp == NULL) {
 7267: 	    return(-1);
 7268: 	}
 7269: 	if (!xmlExpCheckCard(tmp, exp)) {
 7270: 	    xmlExpFree(ctxt, tmp);
 7271: 	    continue;
 7272: 	}
 7273: 	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
 7274: 	if (tmp2 == NULL) {
 7275: 	    xmlExpFree(ctxt, tmp);
 7276: 	    return(-1);
 7277: 	}
 7278: 	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
 7279: 	    if (remain != NULL)
 7280: 	        *remain = tmp2;
 7281: 	    else
 7282: 	        xmlExpFree(ctxt, tmp2);
 7283: 	    if (mult != NULL)
 7284: 	        *mult = tmp;
 7285: 	    else
 7286: 	        xmlExpFree(ctxt, tmp);
 7287: #ifdef DEBUG_DERIV
 7288: 	    printf("Divide succeeded %d\n", i);
 7289: #endif
 7290: 	    return(i);
 7291: 	}
 7292: 	xmlExpFree(ctxt, tmp);
 7293: 	xmlExpFree(ctxt, tmp2);
 7294:     }
 7295: #ifdef DEBUG_DERIV
 7296:     printf("Divide failed\n");
 7297: #endif
 7298:     return(0);
 7299: }
 7300: 
 7301: /**
 7302:  * xmlExpExpDeriveInt:
 7303:  * @ctxt: the expressions context
 7304:  * @exp: the englobing expression
 7305:  * @sub: the subexpression
 7306:  *
 7307:  * Try to do a step of Brzozowski derivation but at a higher level
 7308:  * the input being a subexpression.
 7309:  *
 7310:  * Returns the resulting expression or NULL in case of internal error
 7311:  */
 7312: static xmlExpNodePtr
 7313: xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7314:     xmlExpNodePtr ret, tmp, tmp2, tmp3;
 7315:     const xmlChar **tab;
 7316:     int len, i;
 7317: 
 7318:     /*
 7319:      * In case of equality and if the expression can only consume a finite
 7320:      * amount, then the derivation is empty
 7321:      */
 7322:     if ((exp == sub) && (exp->c_max >= 0)) {
 7323: #ifdef DEBUG_DERIV
 7324:         printf("Equal(exp, sub) and finite -> Empty\n");
 7325: #endif
 7326:         return(emptyExp);
 7327:     }
 7328:     /*
 7329:      * decompose sub sequence first
 7330:      */
 7331:     if (sub->type == XML_EXP_EMPTY) {
 7332: #ifdef DEBUG_DERIV
 7333:         printf("Empty(sub) -> Empty\n");
 7334: #endif
 7335: 	exp->ref++;
 7336:         return(exp);
 7337:     }
 7338:     if (sub->type == XML_EXP_SEQ) {
 7339: #ifdef DEBUG_DERIV
 7340:         printf("Seq(sub) -> decompose\n");
 7341: #endif
 7342:         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
 7343: 	if (tmp == NULL)
 7344: 	    return(NULL);
 7345: 	if (tmp == forbiddenExp)
 7346: 	    return(tmp);
 7347: 	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
 7348: 	xmlExpFree(ctxt, tmp);
 7349: 	return(ret);
 7350:     }
 7351:     if (sub->type == XML_EXP_OR) {
 7352: #ifdef DEBUG_DERIV
 7353:         printf("Or(sub) -> decompose\n");
 7354: #endif
 7355:         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
 7356: 	if (tmp == forbiddenExp)
 7357: 	    return(tmp);
 7358: 	if (tmp == NULL)
 7359: 	    return(NULL);
 7360: 	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
 7361: 	if ((ret == NULL) || (ret == forbiddenExp)) {
 7362: 	    xmlExpFree(ctxt, tmp);
 7363: 	    return(ret);
 7364: 	}
 7365: 	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
 7366:     }
 7367:     if (!xmlExpCheckCard(exp, sub)) {
 7368: #ifdef DEBUG_DERIV
 7369:         printf("CheckCard(exp, sub) failed -> Forbid\n");
 7370: #endif
 7371:         return(forbiddenExp);
 7372:     }
 7373:     switch (exp->type) {
 7374:         case XML_EXP_EMPTY:
 7375: 	    if (sub == emptyExp)
 7376: 	        return(emptyExp);
 7377: #ifdef DEBUG_DERIV
 7378: 	    printf("Empty(exp) -> Forbid\n");
 7379: #endif
 7380: 	    return(forbiddenExp);
 7381:         case XML_EXP_FORBID:
 7382: #ifdef DEBUG_DERIV
 7383: 	    printf("Forbid(exp) -> Forbid\n");
 7384: #endif
 7385: 	    return(forbiddenExp);
 7386:         case XML_EXP_ATOM:
 7387: 	    if (sub->type == XML_EXP_ATOM) {
 7388: 	        /* TODO: handle wildcards */
 7389: 	        if (exp->exp_str == sub->exp_str) {
 7390: #ifdef DEBUG_DERIV
 7391: 		    printf("Atom match -> Empty\n");
 7392: #endif
 7393: 		    return(emptyExp);
 7394:                 }
 7395: #ifdef DEBUG_DERIV
 7396: 		printf("Atom mismatch -> Forbid\n");
 7397: #endif
 7398: 	        return(forbiddenExp);
 7399: 	    }
 7400: 	    if ((sub->type == XML_EXP_COUNT) &&
 7401: 	        (sub->exp_max == 1) &&
 7402: 	        (sub->exp_left->type == XML_EXP_ATOM)) {
 7403: 	        /* TODO: handle wildcards */
 7404: 	        if (exp->exp_str == sub->exp_left->exp_str) {
 7405: #ifdef DEBUG_DERIV
 7406: 		    printf("Atom match -> Empty\n");
 7407: #endif
 7408: 		    return(emptyExp);
 7409: 		}
 7410: #ifdef DEBUG_DERIV
 7411: 		printf("Atom mismatch -> Forbid\n");
 7412: #endif
 7413: 	        return(forbiddenExp);
 7414: 	    }
 7415: #ifdef DEBUG_DERIV
 7416: 	    printf("Compex exp vs Atom -> Forbid\n");
 7417: #endif
 7418: 	    return(forbiddenExp);
 7419:         case XML_EXP_SEQ:
 7420: 	    /* try to get the sequence consumed only if possible */
 7421: 	    if (xmlExpCheckCard(exp->exp_left, sub)) {
 7422: 		/* See if the sequence can be consumed directly */
 7423: #ifdef DEBUG_DERIV
 7424: 		printf("Seq trying left only\n");
 7425: #endif
 7426: 		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7427: 		if ((ret != forbiddenExp) && (ret != NULL)) {
 7428: #ifdef DEBUG_DERIV
 7429: 		    printf("Seq trying left only worked\n");
 7430: #endif
 7431: 		    /*
 7432: 		     * TODO: assumption here that we are determinist
 7433: 		     *       i.e. we won't get to a nillable exp left
 7434: 		     *       subset which could be matched by the right
 7435: 		     *       part too.
 7436: 		     * e.g.: (a | b)+,(a | c) and 'a+,a'
 7437: 		     */
 7438: 		    exp->exp_right->ref++;
 7439: 		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
 7440: 					      exp->exp_right, NULL, 0, 0));
 7441: 		}
 7442: #ifdef DEBUG_DERIV
 7443: 	    } else {
 7444: 		printf("Seq: left too short\n");
 7445: #endif
 7446: 	    }
 7447: 	    /* Try instead to decompose */
 7448: 	    if (sub->type == XML_EXP_COUNT) {
 7449: 		int min, max;
 7450: 
 7451: #ifdef DEBUG_DERIV
 7452: 		printf("Seq: sub is a count\n");
 7453: #endif
 7454: 	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
 7455: 		if (ret == NULL)
 7456: 		    return(NULL);
 7457: 		if (ret != forbiddenExp) {
 7458: #ifdef DEBUG_DERIV
 7459: 		    printf("Seq , Count match on left\n");
 7460: #endif
 7461: 		    if (sub->exp_max < 0)
 7462: 		        max = -1;
 7463: 	            else
 7464: 		        max = sub->exp_max -1;
 7465: 		    if (sub->exp_min > 0)
 7466: 		        min = sub->exp_min -1;
 7467: 		    else
 7468: 		        min = 0;
 7469: 		    exp->exp_right->ref++;
 7470: 		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
 7471: 		                             exp->exp_right, NULL, 0, 0);
 7472: 		    if (tmp == NULL)
 7473: 		        return(NULL);
 7474: 
 7475: 		    sub->exp_left->ref++;
 7476: 		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
 7477: 				      sub->exp_left, NULL, NULL, min, max);
 7478: 		    if (tmp2 == NULL) {
 7479: 		        xmlExpFree(ctxt, tmp);
 7480: 			return(NULL);
 7481: 		    }
 7482: 		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
 7483: 		    xmlExpFree(ctxt, tmp);
 7484: 		    xmlExpFree(ctxt, tmp2);
 7485: 		    return(ret);
 7486: 		}
 7487: 	    }
 7488: 	    /* we made no progress on structured operations */
 7489: 	    break;
 7490:         case XML_EXP_OR:
 7491: #ifdef DEBUG_DERIV
 7492: 	    printf("Or , trying both side\n");
 7493: #endif
 7494: 	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7495: 	    if (ret == NULL)
 7496: 	        return(NULL);
 7497: 	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
 7498: 	    if (tmp == NULL) {
 7499: 		xmlExpFree(ctxt, ret);
 7500: 	        return(NULL);
 7501: 	    }
 7502: 	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
 7503:         case XML_EXP_COUNT: {
 7504: 	    int min, max;
 7505: 
 7506: 	    if (sub->type == XML_EXP_COUNT) {
 7507: 	        /*
 7508: 		 * Try to see if the loop is completely subsumed
 7509: 		 */
 7510: 	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
 7511: 		if (tmp == NULL)
 7512: 		    return(NULL);
 7513: 		if (tmp == forbiddenExp) {
 7514: 		    int mult;
 7515: 
 7516: #ifdef DEBUG_DERIV
 7517: 		    printf("Count, Count inner don't subsume\n");
 7518: #endif
 7519: 		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
 7520: 		                        NULL, &tmp);
 7521: 		    if (mult <= 0) {
 7522: #ifdef DEBUG_DERIV
 7523: 			printf("Count, Count not multiple => forbidden\n");
 7524: #endif
 7525:                         return(forbiddenExp);
 7526: 		    }
 7527: 		    if (sub->exp_max == -1) {
 7528: 		        max = -1;
 7529: 			if (exp->exp_max == -1) {
 7530: 			    if (exp->exp_min <= sub->exp_min * mult)
 7531: 			        min = 0;
 7532: 			    else
 7533: 			        min = exp->exp_min - sub->exp_min * mult;
 7534: 			} else {
 7535: #ifdef DEBUG_DERIV
 7536: 			    printf("Count, Count finite can't subsume infinite\n");
 7537: #endif
 7538:                             xmlExpFree(ctxt, tmp);
 7539: 			    return(forbiddenExp);
 7540: 			}
 7541: 		    } else {
 7542: 			if (exp->exp_max == -1) {
 7543: #ifdef DEBUG_DERIV
 7544: 			    printf("Infinite loop consume mult finite loop\n");
 7545: #endif
 7546: 			    if (exp->exp_min > sub->exp_min * mult) {
 7547: 				max = -1;
 7548: 				min = exp->exp_min - sub->exp_min * mult;
 7549: 			    } else {
 7550: 				max = -1;
 7551: 				min = 0;
 7552: 			    }
 7553: 			} else {
 7554: 			    if (exp->exp_max < sub->exp_max * mult) {
 7555: #ifdef DEBUG_DERIV
 7556: 				printf("loops max mult mismatch => forbidden\n");
 7557: #endif
 7558: 				xmlExpFree(ctxt, tmp);
 7559: 				return(forbiddenExp);
 7560: 			    }
 7561: 			    if (sub->exp_max * mult > exp->exp_min)
 7562: 				min = 0;
 7563: 			    else
 7564: 				min = exp->exp_min - sub->exp_max * mult;
 7565: 			    max = exp->exp_max - sub->exp_max * mult;
 7566: 			}
 7567: 		    }
 7568: 		} else if (!IS_NILLABLE(tmp)) {
 7569: 		    /*
 7570: 		     * TODO: loop here to try to grow if working on finite
 7571: 		     *       blocks.
 7572: 		     */
 7573: #ifdef DEBUG_DERIV
 7574: 		    printf("Count, Count remain not nillable => forbidden\n");
 7575: #endif
 7576: 		    xmlExpFree(ctxt, tmp);
 7577: 		    return(forbiddenExp);
 7578: 		} else if (sub->exp_max == -1) {
 7579: 		    if (exp->exp_max == -1) {
 7580: 		        if (exp->exp_min <= sub->exp_min) {
 7581: #ifdef DEBUG_DERIV
 7582: 			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
 7583: #endif
 7584:                             max = -1;
 7585: 			    min = 0;
 7586: 			} else {
 7587: #ifdef DEBUG_DERIV
 7588: 			    printf("Infinite loops min => Count(X,Inf)\n");
 7589: #endif
 7590:                             max = -1;
 7591: 			    min = exp->exp_min - sub->exp_min;
 7592: 			}
 7593: 		    } else if (exp->exp_min > sub->exp_min) {
 7594: #ifdef DEBUG_DERIV
 7595: 			printf("loops min mismatch 1 => forbidden ???\n");
 7596: #endif
 7597: 		        xmlExpFree(ctxt, tmp);
 7598: 		        return(forbiddenExp);
 7599: 		    } else {
 7600: 			max = -1;
 7601: 			min = 0;
 7602: 		    }
 7603: 		} else {
 7604: 		    if (exp->exp_max == -1) {
 7605: #ifdef DEBUG_DERIV
 7606: 			printf("Infinite loop consume finite loop\n");
 7607: #endif
 7608: 		        if (exp->exp_min > sub->exp_min) {
 7609: 			    max = -1;
 7610: 			    min = exp->exp_min - sub->exp_min;
 7611: 			} else {
 7612: 			    max = -1;
 7613: 			    min = 0;
 7614: 			}
 7615: 		    } else {
 7616: 		        if (exp->exp_max < sub->exp_max) {
 7617: #ifdef DEBUG_DERIV
 7618: 			    printf("loops max mismatch => forbidden\n");
 7619: #endif
 7620: 			    xmlExpFree(ctxt, tmp);
 7621: 			    return(forbiddenExp);
 7622: 			}
 7623: 			if (sub->exp_max > exp->exp_min)
 7624: 			    min = 0;
 7625: 			else
 7626: 			    min = exp->exp_min - sub->exp_max;
 7627: 			max = exp->exp_max - sub->exp_max;
 7628: 		    }
 7629: 		}
 7630: #ifdef DEBUG_DERIV
 7631: 		printf("loops match => SEQ(COUNT())\n");
 7632: #endif
 7633: 		exp->exp_left->ref++;
 7634: 		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
 7635: 		                          NULL, NULL, min, max);
 7636: 		if (tmp2 == NULL) {
 7637: 		    return(NULL);
 7638: 		}
 7639:                 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
 7640: 		                         NULL, 0, 0);
 7641: 		return(ret);
 7642: 	    }
 7643: 	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7644: 	    if (tmp == NULL)
 7645: 		return(NULL);
 7646: 	    if (tmp == forbiddenExp) {
 7647: #ifdef DEBUG_DERIV
 7648: 		printf("loop mismatch => forbidden\n");
 7649: #endif
 7650: 		return(forbiddenExp);
 7651: 	    }
 7652: 	    if (exp->exp_min > 0)
 7653: 		min = exp->exp_min - 1;
 7654: 	    else
 7655: 		min = 0;
 7656: 	    if (exp->exp_max < 0)
 7657: 		max = -1;
 7658: 	    else
 7659: 		max = exp->exp_max - 1;
 7660: 
 7661: #ifdef DEBUG_DERIV
 7662: 	    printf("loop match => SEQ(COUNT())\n");
 7663: #endif
 7664: 	    exp->exp_left->ref++;
 7665: 	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
 7666: 				      NULL, NULL, min, max);
 7667: 	    if (tmp2 == NULL)
 7668: 		return(NULL);
 7669: 	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
 7670: 				     NULL, 0, 0);
 7671: 	    return(ret);
 7672: 	}
 7673:     }
 7674: 
 7675: #ifdef DEBUG_DERIV
 7676:     printf("Fallback to derivative\n");
 7677: #endif
 7678:     if (IS_NILLABLE(sub)) {
 7679:         if (!(IS_NILLABLE(exp)))
 7680: 	    return(forbiddenExp);
 7681: 	else
 7682: 	    ret = emptyExp;
 7683:     } else
 7684: 	ret = NULL;
 7685:     /*
 7686:      * here the structured derivation made no progress so
 7687:      * we use the default token based derivation to force one more step
 7688:      */
 7689:     if (ctxt->tabSize == 0)
 7690:         ctxt->tabSize = 40;
 7691: 
 7692:     tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
 7693: 	                               sizeof(const xmlChar *));
 7694:     if (tab == NULL) {
 7695: 	return(NULL);
 7696:     }
 7697: 
 7698:     /*
 7699:      * collect all the strings accepted by the subexpression on input
 7700:      */
 7701:     len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
 7702:     while (len < 0) {
 7703:         const xmlChar **temp;
 7704: 	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
 7705: 	                                     sizeof(const xmlChar *));
 7706: 	if (temp == NULL) {
 7707: 	    xmlFree((xmlChar **) tab);
 7708: 	    return(NULL);
 7709: 	}
 7710: 	tab = temp;
 7711: 	ctxt->tabSize *= 2;
 7712: 	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
 7713:     }
 7714:     for (i = 0;i < len;i++) {
 7715:         tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
 7716: 	if ((tmp == NULL) || (tmp == forbiddenExp)) {
 7717: 	    xmlExpFree(ctxt, ret);
 7718: 	    xmlFree((xmlChar **) tab);
 7719: 	    return(tmp);
 7720: 	}
 7721: 	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
 7722: 	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
 7723: 	    xmlExpFree(ctxt, tmp);
 7724: 	    xmlExpFree(ctxt, ret);
 7725: 	    xmlFree((xmlChar **) tab);
 7726: 	    return(tmp);
 7727: 	}
 7728: 	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
 7729: 	xmlExpFree(ctxt, tmp);
 7730: 	xmlExpFree(ctxt, tmp2);
 7731: 
 7732: 	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
 7733: 	    xmlExpFree(ctxt, ret);
 7734: 	    xmlFree((xmlChar **) tab);
 7735: 	    return(tmp3);
 7736: 	}
 7737: 
 7738: 	if (ret == NULL)
 7739: 	    ret = tmp3;
 7740: 	else {
 7741: 	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
 7742: 	    if (ret == NULL) {
 7743: 		xmlFree((xmlChar **) tab);
 7744: 	        return(NULL);
 7745: 	    }
 7746: 	}
 7747:     }
 7748:     xmlFree((xmlChar **) tab);
 7749:     return(ret);
 7750: }
 7751: 
 7752: /**
 7753:  * xmlExpExpDerive:
 7754:  * @ctxt: the expressions context
 7755:  * @exp: the englobing expression
 7756:  * @sub: the subexpression
 7757:  *
 7758:  * Evaluates the expression resulting from @exp consuming a sub expression @sub
 7759:  * Based on algebraic derivation and sometimes direct Brzozowski derivation
 7760:  * it usually tatkes less than linear time and can handle expressions generating
 7761:  * infinite languages.
 7762:  *
 7763:  * Returns the resulting expression or NULL in case of internal error, the
 7764:  *         result must be freed
 7765:  */
 7766: xmlExpNodePtr
 7767: xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7768:     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
 7769:         return(NULL);
 7770: 
 7771:     /*
 7772:      * O(1) speedups
 7773:      */
 7774:     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
 7775: #ifdef DEBUG_DERIV
 7776: 	printf("Sub nillable and not exp : can't subsume\n");
 7777: #endif
 7778:         return(forbiddenExp);
 7779:     }
 7780:     if (xmlExpCheckCard(exp, sub) == 0) {
 7781: #ifdef DEBUG_DERIV
 7782: 	printf("sub generate longuer sequances than exp : can't subsume\n");
 7783: #endif
 7784:         return(forbiddenExp);
 7785:     }
 7786:     return(xmlExpExpDeriveInt(ctxt, exp, sub));
 7787: }
 7788: 
 7789: /**
 7790:  * xmlExpSubsume:
 7791:  * @ctxt: the expressions context
 7792:  * @exp: the englobing expression
 7793:  * @sub: the subexpression
 7794:  *
 7795:  * Check whether @exp accepts all the languages accexpted by @sub
 7796:  * the input being a subexpression.
 7797:  *
 7798:  * Returns 1 if true 0 if false and -1 in case of failure.
 7799:  */
 7800: int
 7801: xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7802:     xmlExpNodePtr tmp;
 7803: 
 7804:     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
 7805:         return(-1);
 7806: 
 7807:     /*
 7808:      * TODO: speedup by checking the language of sub is a subset of the
 7809:      *       language of exp
 7810:      */
 7811:     /*
 7812:      * O(1) speedups
 7813:      */
 7814:     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
 7815: #ifdef DEBUG_DERIV
 7816: 	printf("Sub nillable and not exp : can't subsume\n");
 7817: #endif
 7818:         return(0);
 7819:     }
 7820:     if (xmlExpCheckCard(exp, sub) == 0) {
 7821: #ifdef DEBUG_DERIV
 7822: 	printf("sub generate longuer sequances than exp : can't subsume\n");
 7823: #endif
 7824:         return(0);
 7825:     }
 7826:     tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
 7827: #ifdef DEBUG_DERIV
 7828:     printf("Result derivation :\n");
 7829:     PRINT_EXP(tmp);
 7830: #endif
 7831:     if (tmp == NULL)
 7832:         return(-1);
 7833:     if (tmp == forbiddenExp)
 7834: 	return(0);
 7835:     if (tmp == emptyExp)
 7836: 	return(1);
 7837:     if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
 7838:         xmlExpFree(ctxt, tmp);
 7839:         return(1);
 7840:     }
 7841:     xmlExpFree(ctxt, tmp);
 7842:     return(0);
 7843: }
 7844: 
 7845: /************************************************************************
 7846:  *									*
 7847:  *			Parsing expression				*
 7848:  *									*
 7849:  ************************************************************************/
 7850: 
 7851: static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
 7852: 
 7853: #undef CUR
 7854: #define CUR (*ctxt->cur)
 7855: #undef NEXT
 7856: #define NEXT ctxt->cur++;
 7857: #undef IS_BLANK
 7858: #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
 7859: #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
 7860: 
 7861: static int
 7862: xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
 7863:     int ret = 0;
 7864: 
 7865:     SKIP_BLANKS
 7866:     if (CUR == '*') {
 7867: 	NEXT
 7868: 	return(-1);
 7869:     }
 7870:     if ((CUR < '0') || (CUR > '9'))
 7871:         return(-1);
 7872:     while ((CUR >= '0') && (CUR <= '9')) {
 7873:         ret = ret * 10 + (CUR - '0');
 7874: 	NEXT
 7875:     }
 7876:     return(ret);
 7877: }
 7878: 
 7879: static xmlExpNodePtr
 7880: xmlExpParseOr(xmlExpCtxtPtr ctxt) {
 7881:     const char *base;
 7882:     xmlExpNodePtr ret;
 7883:     const xmlChar *val;
 7884: 
 7885:     SKIP_BLANKS
 7886:     base = ctxt->cur;
 7887:     if (*ctxt->cur == '(') {
 7888:         NEXT
 7889: 	ret = xmlExpParseExpr(ctxt);
 7890: 	SKIP_BLANKS
 7891: 	if (*ctxt->cur != ')') {
 7892: 	    fprintf(stderr, "unbalanced '(' : %s\n", base);
 7893: 	    xmlExpFree(ctxt, ret);
 7894: 	    return(NULL);
 7895: 	}
 7896: 	NEXT;
 7897: 	SKIP_BLANKS
 7898: 	goto parse_quantifier;
 7899:     }
 7900:     while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
 7901:            (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
 7902: 	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
 7903: 	NEXT;
 7904:     val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
 7905:     if (val == NULL)
 7906:         return(NULL);
 7907:     ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
 7908:     if (ret == NULL)
 7909:         return(NULL);
 7910:     SKIP_BLANKS
 7911: parse_quantifier:
 7912:     if (CUR == '{') {
 7913:         int min, max;
 7914: 
 7915:         NEXT
 7916: 	min = xmlExpParseNumber(ctxt);
 7917: 	if (min < 0) {
 7918: 	    xmlExpFree(ctxt, ret);
 7919: 	    return(NULL);
 7920: 	}
 7921: 	SKIP_BLANKS
 7922: 	if (CUR == ',') {
 7923: 	    NEXT
 7924: 	    max = xmlExpParseNumber(ctxt);
 7925: 	    SKIP_BLANKS
 7926: 	} else
 7927: 	    max = min;
 7928: 	if (CUR != '}') {
 7929: 	    xmlExpFree(ctxt, ret);
 7930: 	    return(NULL);
 7931: 	}
 7932:         NEXT
 7933: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7934: 	                         min, max);
 7935: 	SKIP_BLANKS
 7936:     } else if (CUR == '?') {
 7937:         NEXT
 7938: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7939: 	                         0, 1);
 7940: 	SKIP_BLANKS
 7941:     } else if (CUR == '+') {
 7942:         NEXT
 7943: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7944: 	                         1, -1);
 7945: 	SKIP_BLANKS
 7946:     } else if (CUR == '*') {
 7947:         NEXT
 7948: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7949: 	                         0, -1);
 7950: 	SKIP_BLANKS
 7951:     }
 7952:     return(ret);
 7953: }
 7954: 
 7955: 
 7956: static xmlExpNodePtr
 7957: xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
 7958:     xmlExpNodePtr ret, right;
 7959: 
 7960:     ret = xmlExpParseOr(ctxt);
 7961:     SKIP_BLANKS
 7962:     while (CUR == '|') {
 7963:         NEXT
 7964: 	right = xmlExpParseOr(ctxt);
 7965: 	if (right == NULL) {
 7966: 	    xmlExpFree(ctxt, ret);
 7967: 	    return(NULL);
 7968: 	}
 7969: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
 7970: 	if (ret == NULL)
 7971: 	    return(NULL);
 7972:     }
 7973:     return(ret);
 7974: }
 7975: 
 7976: static xmlExpNodePtr
 7977: xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
 7978:     xmlExpNodePtr ret, right;
 7979: 
 7980:     ret = xmlExpParseSeq(ctxt);
 7981:     SKIP_BLANKS
 7982:     while (CUR == ',') {
 7983:         NEXT
 7984: 	right = xmlExpParseSeq(ctxt);
 7985: 	if (right == NULL) {
 7986: 	    xmlExpFree(ctxt, ret);
 7987: 	    return(NULL);
 7988: 	}
 7989: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
 7990: 	if (ret == NULL)
 7991: 	    return(NULL);
 7992:     }
 7993:     return(ret);
 7994: }
 7995: 
 7996: /**
 7997:  * xmlExpParse:
 7998:  * @ctxt: the expressions context
 7999:  * @expr: the 0 terminated string
 8000:  *
 8001:  * Minimal parser for regexps, it understand the following constructs
 8002:  *  - string terminals
 8003:  *  - choice operator |
 8004:  *  - sequence operator ,
 8005:  *  - subexpressions (...)
 8006:  *  - usual cardinality operators + * and ?
 8007:  *  - finite sequences  { min, max }
 8008:  *  - infinite sequences { min, * }
 8009:  * There is minimal checkings made especially no checking on strings values
 8010:  *
 8011:  * Returns a new expression or NULL in case of failure
 8012:  */
 8013: xmlExpNodePtr
 8014: xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
 8015:     xmlExpNodePtr ret;
 8016: 
 8017:     ctxt->expr = expr;
 8018:     ctxt->cur = expr;
 8019: 
 8020:     ret = xmlExpParseExpr(ctxt);
 8021:     SKIP_BLANKS
 8022:     if (*ctxt->cur != 0) {
 8023:         xmlExpFree(ctxt, ret);
 8024:         return(NULL);
 8025:     }
 8026:     return(ret);
 8027: }
 8028: 
 8029: static void
 8030: xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
 8031:     xmlExpNodePtr c;
 8032: 
 8033:     if (expr == NULL) return;
 8034:     if (glob) xmlBufferWriteChar(buf, "(");
 8035:     switch (expr->type) {
 8036:         case XML_EXP_EMPTY:
 8037: 	    xmlBufferWriteChar(buf, "empty");
 8038: 	    break;
 8039:         case XML_EXP_FORBID:
 8040: 	    xmlBufferWriteChar(buf, "forbidden");
 8041: 	    break;
 8042:         case XML_EXP_ATOM:
 8043: 	    xmlBufferWriteCHAR(buf, expr->exp_str);
 8044: 	    break;
 8045:         case XML_EXP_SEQ:
 8046: 	    c = expr->exp_left;
 8047: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8048: 	        xmlExpDumpInt(buf, c, 1);
 8049: 	    else
 8050: 	        xmlExpDumpInt(buf, c, 0);
 8051: 	    xmlBufferWriteChar(buf, " , ");
 8052: 	    c = expr->exp_right;
 8053: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8054: 	        xmlExpDumpInt(buf, c, 1);
 8055: 	    else
 8056: 	        xmlExpDumpInt(buf, c, 0);
 8057:             break;
 8058:         case XML_EXP_OR:
 8059: 	    c = expr->exp_left;
 8060: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8061: 	        xmlExpDumpInt(buf, c, 1);
 8062: 	    else
 8063: 	        xmlExpDumpInt(buf, c, 0);
 8064: 	    xmlBufferWriteChar(buf, " | ");
 8065: 	    c = expr->exp_right;
 8066: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8067: 	        xmlExpDumpInt(buf, c, 1);
 8068: 	    else
 8069: 	        xmlExpDumpInt(buf, c, 0);
 8070:             break;
 8071:         case XML_EXP_COUNT: {
 8072: 	    char rep[40];
 8073: 
 8074: 	    c = expr->exp_left;
 8075: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8076: 	        xmlExpDumpInt(buf, c, 1);
 8077: 	    else
 8078: 	        xmlExpDumpInt(buf, c, 0);
 8079: 	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
 8080: 		rep[0] = '?';
 8081: 		rep[1] = 0;
 8082: 	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
 8083: 		rep[0] = '*';
 8084: 		rep[1] = 0;
 8085: 	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
 8086: 		rep[0] = '+';
 8087: 		rep[1] = 0;
 8088: 	    } else if (expr->exp_max == expr->exp_min) {
 8089: 	        snprintf(rep, 39, "{%d}", expr->exp_min);
 8090: 	    } else if (expr->exp_max < 0) {
 8091: 	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
 8092: 	    } else {
 8093: 	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
 8094: 	    }
 8095: 	    rep[39] = 0;
 8096: 	    xmlBufferWriteChar(buf, rep);
 8097: 	    break;
 8098: 	}
 8099: 	default:
 8100: 	    fprintf(stderr, "Error in tree\n");
 8101:     }
 8102:     if (glob)
 8103:         xmlBufferWriteChar(buf, ")");
 8104: }
 8105: /**
 8106:  * xmlExpDump:
 8107:  * @buf:  a buffer to receive the output
 8108:  * @expr:  the compiled expression
 8109:  *
 8110:  * Serialize the expression as compiled to the buffer
 8111:  */
 8112: void
 8113: xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
 8114:     if ((buf == NULL) || (expr == NULL))
 8115:         return;
 8116:     xmlExpDumpInt(buf, expr, 0);
 8117: }
 8118: 
 8119: /**
 8120:  * xmlExpMaxToken:
 8121:  * @expr: a compiled expression
 8122:  *
 8123:  * Indicate the maximum number of input a expression can accept
 8124:  *
 8125:  * Returns the maximum length or -1 in case of error
 8126:  */
 8127: int
 8128: xmlExpMaxToken(xmlExpNodePtr expr) {
 8129:     if (expr == NULL)
 8130:         return(-1);
 8131:     return(expr->c_max);
 8132: }
 8133: 
 8134: /**
 8135:  * xmlExpCtxtNbNodes:
 8136:  * @ctxt: an expression context
 8137:  *
 8138:  * Debugging facility provides the number of allocated nodes at a that point
 8139:  *
 8140:  * Returns the number of nodes in use or -1 in case of error
 8141:  */
 8142: int
 8143: xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
 8144:     if (ctxt == NULL)
 8145:         return(-1);
 8146:     return(ctxt->nb_nodes);
 8147: }
 8148: 
 8149: /**
 8150:  * xmlExpCtxtNbCons:
 8151:  * @ctxt: an expression context
 8152:  *
 8153:  * Debugging facility provides the number of allocated nodes over lifetime
 8154:  *
 8155:  * Returns the number of nodes ever allocated or -1 in case of error
 8156:  */
 8157: int
 8158: xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
 8159:     if (ctxt == NULL)
 8160:         return(-1);
 8161:     return(ctxt->nb_cons);
 8162: }
 8163: 
 8164: #endif /* LIBXML_EXPR_ENABLED */
 8165: #define bottom_xmlregexp
 8166: #include "elfgcchack.h"
 8167: #endif /* LIBXML_REGEXP_ENABLED */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>