File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / xmlregexp.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:37:58 2012 UTC (12 years, 4 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_7_8, HEAD
libxml2

    1: /*
    2:  * regexp.c: generic and extensible Regular Expression engine
    3:  *
    4:  * Basically designed with the purpose of compiling regexps for 
    5:  * the variety of validation/shemas mechanisms now available in
    6:  * XML related specifications these include:
    7:  *    - XML-1.0 DTD validation
    8:  *    - XML Schemas structure part 1
    9:  *    - XML Schemas Datatypes part 2 especially Appendix F
   10:  *    - RELAX-NG/TREX i.e. the counter proposal
   11:  *
   12:  * See Copyright for the status of this software.
   13:  *
   14:  * Daniel Veillard <veillard@redhat.com>
   15:  */
   16: 
   17: #define IN_LIBXML
   18: #include "libxml.h"
   19: 
   20: #ifdef LIBXML_REGEXP_ENABLED
   21: 
   22: /* #define DEBUG_ERR */
   23: 
   24: #include <stdio.h>
   25: #include <string.h>
   26: #ifdef HAVE_LIMITS_H
   27: #include <limits.h>
   28: #endif
   29: 
   30: #include <libxml/tree.h>
   31: #include <libxml/parserInternals.h>
   32: #include <libxml/xmlregexp.h>
   33: #include <libxml/xmlautomata.h>
   34: #include <libxml/xmlunicode.h>
   35: 
   36: #ifndef INT_MAX
   37: #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
   38: #endif
   39: 
   40: /* #define DEBUG_REGEXP_GRAPH */
   41: /* #define DEBUG_REGEXP_EXEC */
   42: /* #define DEBUG_PUSH */
   43: /* #define DEBUG_COMPACTION */
   44: 
   45: #define MAX_PUSH 10000000
   46: 
   47: #define ERROR(str)							\
   48:     ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
   49:     xmlRegexpErrCompile(ctxt, str);
   50: #define NEXT ctxt->cur++
   51: #define CUR (*(ctxt->cur))
   52: #define NXT(index) (ctxt->cur[index])
   53: 
   54: #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
   55: #define NEXTL(l) ctxt->cur += l;
   56: #define XML_REG_STRING_SEPARATOR '|'
   57: /*
   58:  * Need PREV to check on a '-' within a Character Group. May only be used
   59:  * when it's guaranteed that cur is not at the beginning of ctxt->string!
   60:  */
   61: #define PREV (ctxt->cur[-1])
   62: 
   63: /**
   64:  * TODO:
   65:  *
   66:  * macro to flag unimplemented blocks
   67:  */
   68: #define TODO 								\
   69:     xmlGenericError(xmlGenericErrorContext,				\
   70: 	    "Unimplemented block at %s:%d\n",				\
   71:             __FILE__, __LINE__);
   72: 
   73: /************************************************************************
   74:  * 									*
   75:  * 			Datatypes and structures			*
   76:  * 									*
   77:  ************************************************************************/
   78: 
   79: /*
   80:  * Note: the order of the enums below is significant, do not shuffle
   81:  */
   82: typedef enum {
   83:     XML_REGEXP_EPSILON = 1,
   84:     XML_REGEXP_CHARVAL,
   85:     XML_REGEXP_RANGES,
   86:     XML_REGEXP_SUBREG,  /* used for () sub regexps */
   87:     XML_REGEXP_STRING,
   88:     XML_REGEXP_ANYCHAR, /* . */
   89:     XML_REGEXP_ANYSPACE, /* \s */
   90:     XML_REGEXP_NOTSPACE, /* \S */
   91:     XML_REGEXP_INITNAME, /* \l */
   92:     XML_REGEXP_NOTINITNAME, /* \L */
   93:     XML_REGEXP_NAMECHAR, /* \c */
   94:     XML_REGEXP_NOTNAMECHAR, /* \C */
   95:     XML_REGEXP_DECIMAL, /* \d */
   96:     XML_REGEXP_NOTDECIMAL, /* \D */
   97:     XML_REGEXP_REALCHAR, /* \w */
   98:     XML_REGEXP_NOTREALCHAR, /* \W */
   99:     XML_REGEXP_LETTER = 100,
  100:     XML_REGEXP_LETTER_UPPERCASE,
  101:     XML_REGEXP_LETTER_LOWERCASE,
  102:     XML_REGEXP_LETTER_TITLECASE,
  103:     XML_REGEXP_LETTER_MODIFIER,
  104:     XML_REGEXP_LETTER_OTHERS,
  105:     XML_REGEXP_MARK,
  106:     XML_REGEXP_MARK_NONSPACING,
  107:     XML_REGEXP_MARK_SPACECOMBINING,
  108:     XML_REGEXP_MARK_ENCLOSING,
  109:     XML_REGEXP_NUMBER,
  110:     XML_REGEXP_NUMBER_DECIMAL,
  111:     XML_REGEXP_NUMBER_LETTER,
  112:     XML_REGEXP_NUMBER_OTHERS,
  113:     XML_REGEXP_PUNCT,
  114:     XML_REGEXP_PUNCT_CONNECTOR,
  115:     XML_REGEXP_PUNCT_DASH,
  116:     XML_REGEXP_PUNCT_OPEN,
  117:     XML_REGEXP_PUNCT_CLOSE,
  118:     XML_REGEXP_PUNCT_INITQUOTE,
  119:     XML_REGEXP_PUNCT_FINQUOTE,
  120:     XML_REGEXP_PUNCT_OTHERS,
  121:     XML_REGEXP_SEPAR,
  122:     XML_REGEXP_SEPAR_SPACE,
  123:     XML_REGEXP_SEPAR_LINE,
  124:     XML_REGEXP_SEPAR_PARA,
  125:     XML_REGEXP_SYMBOL,
  126:     XML_REGEXP_SYMBOL_MATH,
  127:     XML_REGEXP_SYMBOL_CURRENCY,
  128:     XML_REGEXP_SYMBOL_MODIFIER,
  129:     XML_REGEXP_SYMBOL_OTHERS,
  130:     XML_REGEXP_OTHER,
  131:     XML_REGEXP_OTHER_CONTROL,
  132:     XML_REGEXP_OTHER_FORMAT,
  133:     XML_REGEXP_OTHER_PRIVATE,
  134:     XML_REGEXP_OTHER_NA,
  135:     XML_REGEXP_BLOCK_NAME
  136: } xmlRegAtomType;
  137: 
  138: typedef enum {
  139:     XML_REGEXP_QUANT_EPSILON = 1,
  140:     XML_REGEXP_QUANT_ONCE,
  141:     XML_REGEXP_QUANT_OPT,
  142:     XML_REGEXP_QUANT_MULT,
  143:     XML_REGEXP_QUANT_PLUS,
  144:     XML_REGEXP_QUANT_ONCEONLY,
  145:     XML_REGEXP_QUANT_ALL,
  146:     XML_REGEXP_QUANT_RANGE
  147: } xmlRegQuantType;
  148: 
  149: typedef enum {
  150:     XML_REGEXP_START_STATE = 1,
  151:     XML_REGEXP_FINAL_STATE,
  152:     XML_REGEXP_TRANS_STATE,
  153:     XML_REGEXP_SINK_STATE,
  154:     XML_REGEXP_UNREACH_STATE
  155: } xmlRegStateType;
  156: 
  157: typedef enum {
  158:     XML_REGEXP_MARK_NORMAL = 0,
  159:     XML_REGEXP_MARK_START,
  160:     XML_REGEXP_MARK_VISITED
  161: } xmlRegMarkedType;
  162: 
  163: typedef struct _xmlRegRange xmlRegRange;
  164: typedef xmlRegRange *xmlRegRangePtr;
  165: 
  166: struct _xmlRegRange {
  167:     int neg;		/* 0 normal, 1 not, 2 exclude */
  168:     xmlRegAtomType type;
  169:     int start;
  170:     int end;
  171:     xmlChar *blockName;
  172: };
  173: 
  174: typedef struct _xmlRegAtom xmlRegAtom;
  175: typedef xmlRegAtom *xmlRegAtomPtr;
  176: 
  177: typedef struct _xmlAutomataState xmlRegState;
  178: typedef xmlRegState *xmlRegStatePtr;
  179: 
  180: struct _xmlRegAtom {
  181:     int no;
  182:     xmlRegAtomType type;
  183:     xmlRegQuantType quant;
  184:     int min;
  185:     int max;
  186: 
  187:     void *valuep;
  188:     void *valuep2;
  189:     int neg;
  190:     int codepoint;
  191:     xmlRegStatePtr start;
  192:     xmlRegStatePtr start0;
  193:     xmlRegStatePtr stop;
  194:     int maxRanges;
  195:     int nbRanges;
  196:     xmlRegRangePtr *ranges;
  197:     void *data;
  198: };
  199: 
  200: typedef struct _xmlRegCounter xmlRegCounter;
  201: typedef xmlRegCounter *xmlRegCounterPtr;
  202: 
  203: struct _xmlRegCounter {
  204:     int min;
  205:     int max;
  206: };
  207: 
  208: typedef struct _xmlRegTrans xmlRegTrans;
  209: typedef xmlRegTrans *xmlRegTransPtr;
  210: 
  211: struct _xmlRegTrans {
  212:     xmlRegAtomPtr atom;
  213:     int to;
  214:     int counter;
  215:     int count;
  216:     int nd;
  217: };
  218: 
  219: struct _xmlAutomataState {
  220:     xmlRegStateType type;
  221:     xmlRegMarkedType mark;
  222:     xmlRegMarkedType reached;
  223:     int no;
  224:     int maxTrans;
  225:     int nbTrans;
  226:     xmlRegTrans *trans;
  227:     /*  knowing states ponting to us can speed things up */
  228:     int maxTransTo;
  229:     int nbTransTo;
  230:     int *transTo;
  231: };
  232: 
  233: typedef struct _xmlAutomata xmlRegParserCtxt;
  234: typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
  235: 
  236: #define AM_AUTOMATA_RNG 1
  237: 
  238: struct _xmlAutomata {
  239:     xmlChar *string;
  240:     xmlChar *cur;
  241: 
  242:     int error;
  243:     int neg;
  244: 
  245:     xmlRegStatePtr start;
  246:     xmlRegStatePtr end;
  247:     xmlRegStatePtr state;
  248: 
  249:     xmlRegAtomPtr atom;
  250: 
  251:     int maxAtoms;
  252:     int nbAtoms;
  253:     xmlRegAtomPtr *atoms;
  254: 
  255:     int maxStates;
  256:     int nbStates;
  257:     xmlRegStatePtr *states;
  258: 
  259:     int maxCounters;
  260:     int nbCounters;
  261:     xmlRegCounter *counters;
  262: 
  263:     int determinist;
  264:     int negs;
  265:     int flags;
  266: };
  267: 
  268: struct _xmlRegexp {
  269:     xmlChar *string;
  270:     int nbStates;
  271:     xmlRegStatePtr *states;
  272:     int nbAtoms;
  273:     xmlRegAtomPtr *atoms;
  274:     int nbCounters;
  275:     xmlRegCounter *counters;
  276:     int determinist;
  277:     int flags;
  278:     /*
  279:      * That's the compact form for determinists automatas
  280:      */
  281:     int nbstates;
  282:     int *compact;
  283:     void **transdata;
  284:     int nbstrings;
  285:     xmlChar **stringMap;
  286: };
  287: 
  288: typedef struct _xmlRegExecRollback xmlRegExecRollback;
  289: typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
  290: 
  291: struct _xmlRegExecRollback {
  292:     xmlRegStatePtr state;/* the current state */
  293:     int index;		/* the index in the input stack */
  294:     int nextbranch;	/* the next transition to explore in that state */
  295:     int *counts;	/* save the automata state if it has some */
  296: };
  297: 
  298: typedef struct _xmlRegInputToken xmlRegInputToken;
  299: typedef xmlRegInputToken *xmlRegInputTokenPtr;
  300: 
  301: struct _xmlRegInputToken {
  302:     xmlChar *value;
  303:     void *data;
  304: };
  305: 
  306: struct _xmlRegExecCtxt {
  307:     int status;		/* execution status != 0 indicate an error */
  308:     int determinist;	/* did we find an indeterministic behaviour */
  309:     xmlRegexpPtr comp;	/* the compiled regexp */
  310:     xmlRegExecCallbacks callback;
  311:     void *data;
  312: 
  313:     xmlRegStatePtr state;/* the current state */
  314:     int transno;	/* the current transition on that state */
  315:     int transcount;	/* the number of chars in char counted transitions */
  316: 
  317:     /*
  318:      * A stack of rollback states
  319:      */
  320:     int maxRollbacks;
  321:     int nbRollbacks;
  322:     xmlRegExecRollback *rollbacks;
  323: 
  324:     /*
  325:      * The state of the automata if any
  326:      */
  327:     int *counts;
  328: 
  329:     /*
  330:      * The input stack
  331:      */
  332:     int inputStackMax;
  333:     int inputStackNr;
  334:     int index;
  335:     int *charStack;
  336:     const xmlChar *inputString; /* when operating on characters */
  337:     xmlRegInputTokenPtr inputStack;/* when operating on strings */
  338: 
  339:     /*
  340:      * error handling
  341:      */
  342:     int errStateNo;		/* the error state number */
  343:     xmlRegStatePtr errState;    /* the error state */
  344:     xmlChar *errString;		/* the string raising the error */
  345:     int *errCounts;		/* counters at the error state */
  346:     int nbPush;
  347: };
  348: 
  349: #define REGEXP_ALL_COUNTER	0x123456
  350: #define REGEXP_ALL_LAX_COUNTER	0x123457
  351: 
  352: static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
  353: static void xmlRegFreeState(xmlRegStatePtr state);
  354: static void xmlRegFreeAtom(xmlRegAtomPtr atom);
  355: static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
  356: static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
  357: static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
  358:                   int neg, int start, int end, const xmlChar *blockName);
  359: 
  360: void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
  361: 
  362: /************************************************************************
  363:  *									*
  364:  * 		Regexp memory error handler				*
  365:  *									*
  366:  ************************************************************************/
  367: /**
  368:  * xmlRegexpErrMemory:
  369:  * @extra:  extra information
  370:  *
  371:  * Handle an out of memory condition
  372:  */
  373: static void
  374: xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
  375: {
  376:     const char *regexp = NULL;
  377:     if (ctxt != NULL) {
  378:         regexp = (const char *) ctxt->string;
  379: 	ctxt->error = XML_ERR_NO_MEMORY;
  380:     }
  381:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  382: 		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
  383: 		    regexp, NULL, 0, 0,
  384: 		    "Memory allocation failed : %s\n", extra);
  385: }
  386: 
  387: /**
  388:  * xmlRegexpErrCompile:
  389:  * @extra:  extra information
  390:  *
  391:  * Handle a compilation failure
  392:  */
  393: static void
  394: xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
  395: {
  396:     const char *regexp = NULL;
  397:     int idx = 0;
  398: 
  399:     if (ctxt != NULL) {
  400:         regexp = (const char *) ctxt->string;
  401: 	idx = ctxt->cur - ctxt->string;
  402: 	ctxt->error = XML_REGEXP_COMPILE_ERROR;
  403:     }
  404:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  405: 		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
  406: 		    regexp, NULL, idx, 0,
  407: 		    "failed to compile: %s\n", extra);
  408: }
  409: 
  410: /************************************************************************
  411:  * 									*
  412:  * 			Allocation/Deallocation				*
  413:  * 									*
  414:  ************************************************************************/
  415: 
  416: static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
  417: /**
  418:  * xmlRegEpxFromParse:
  419:  * @ctxt:  the parser context used to build it
  420:  *
  421:  * Allocate a new regexp and fill it with the result from the parser
  422:  *
  423:  * Returns the new regexp or NULL in case of error
  424:  */
  425: static xmlRegexpPtr
  426: xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
  427:     xmlRegexpPtr ret;
  428: 
  429:     ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
  430:     if (ret == NULL) {
  431: 	xmlRegexpErrMemory(ctxt, "compiling regexp");
  432: 	return(NULL);
  433:     }
  434:     memset(ret, 0, sizeof(xmlRegexp));
  435:     ret->string = ctxt->string;
  436:     ret->nbStates = ctxt->nbStates;
  437:     ret->states = ctxt->states;
  438:     ret->nbAtoms = ctxt->nbAtoms;
  439:     ret->atoms = ctxt->atoms;
  440:     ret->nbCounters = ctxt->nbCounters;
  441:     ret->counters = ctxt->counters;
  442:     ret->determinist = ctxt->determinist;
  443:     ret->flags = ctxt->flags;
  444:     if (ret->determinist == -1) {
  445:         xmlRegexpIsDeterminist(ret);
  446:     }
  447: 
  448:     if ((ret->determinist != 0) &&
  449: 	(ret->nbCounters == 0) &&
  450: 	(ctxt->negs == 0) &&
  451: 	(ret->atoms != NULL) &&
  452: 	(ret->atoms[0] != NULL) &&
  453: 	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
  454: 	int i, j, nbstates = 0, nbatoms = 0;
  455: 	int *stateRemap;
  456: 	int *stringRemap;
  457: 	int *transitions;
  458: 	void **transdata;
  459: 	xmlChar **stringMap;
  460:         xmlChar *value;
  461: 
  462: 	/*
  463: 	 * Switch to a compact representation
  464: 	 * 1/ counting the effective number of states left
  465: 	 * 2/ counting the unique number of atoms, and check that
  466: 	 *    they are all of the string type
  467: 	 * 3/ build a table state x atom for the transitions
  468: 	 */
  469: 
  470: 	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
  471: 	if (stateRemap == NULL) {
  472: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  473: 	    xmlFree(ret);
  474: 	    return(NULL);
  475: 	}
  476: 	for (i = 0;i < ret->nbStates;i++) {
  477: 	    if (ret->states[i] != NULL) {
  478: 		stateRemap[i] = nbstates;
  479: 		nbstates++;
  480: 	    } else {
  481: 		stateRemap[i] = -1;
  482: 	    }
  483: 	}
  484: #ifdef DEBUG_COMPACTION
  485: 	printf("Final: %d states\n", nbstates);
  486: #endif
  487: 	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
  488: 	if (stringMap == NULL) {
  489: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  490: 	    xmlFree(stateRemap);
  491: 	    xmlFree(ret);
  492: 	    return(NULL);
  493: 	}
  494: 	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
  495: 	if (stringRemap == NULL) {
  496: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  497: 	    xmlFree(stringMap);
  498: 	    xmlFree(stateRemap);
  499: 	    xmlFree(ret);
  500: 	    return(NULL);
  501: 	}
  502: 	for (i = 0;i < ret->nbAtoms;i++) {
  503: 	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
  504: 		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
  505: 		value = ret->atoms[i]->valuep;
  506:                 for (j = 0;j < nbatoms;j++) {
  507: 		    if (xmlStrEqual(stringMap[j], value)) {
  508: 			stringRemap[i] = j;
  509: 			break;
  510: 		    }
  511: 		}
  512: 		if (j >= nbatoms) {
  513: 		    stringRemap[i] = nbatoms;
  514: 		    stringMap[nbatoms] = xmlStrdup(value);
  515: 		    if (stringMap[nbatoms] == NULL) {
  516: 			for (i = 0;i < nbatoms;i++)
  517: 			    xmlFree(stringMap[i]);
  518: 			xmlFree(stringRemap);
  519: 			xmlFree(stringMap);
  520: 			xmlFree(stateRemap);
  521: 			xmlFree(ret);
  522: 			return(NULL);
  523: 		    }
  524: 		    nbatoms++;
  525: 		}
  526: 	    } else {
  527: 		xmlFree(stateRemap);
  528: 		xmlFree(stringRemap);
  529: 		for (i = 0;i < nbatoms;i++)
  530: 		    xmlFree(stringMap[i]);
  531: 		xmlFree(stringMap);
  532: 		xmlFree(ret);
  533: 		return(NULL);
  534: 	    }
  535: 	}
  536: #ifdef DEBUG_COMPACTION
  537: 	printf("Final: %d atoms\n", nbatoms);
  538: #endif
  539: 	transitions = (int *) xmlMalloc((nbstates + 1) *
  540: 	                                (nbatoms + 1) * sizeof(int));
  541: 	if (transitions == NULL) {
  542: 	    xmlFree(stateRemap);
  543: 	    xmlFree(stringRemap);
  544: 	    xmlFree(stringMap);
  545: 	    xmlFree(ret);
  546: 	    return(NULL);
  547: 	}
  548: 	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
  549: 
  550: 	/*
  551: 	 * Allocate the transition table. The first entry for each
  552: 	 * state corresponds to the state type.
  553: 	 */
  554: 	transdata = NULL;
  555: 
  556: 	for (i = 0;i < ret->nbStates;i++) {
  557: 	    int stateno, atomno, targetno, prev;
  558: 	    xmlRegStatePtr state;
  559: 	    xmlRegTransPtr trans;
  560: 
  561: 	    stateno = stateRemap[i];
  562: 	    if (stateno == -1)
  563: 		continue;
  564: 	    state = ret->states[i];
  565: 
  566: 	    transitions[stateno * (nbatoms + 1)] = state->type;
  567: 
  568: 	    for (j = 0;j < state->nbTrans;j++) {
  569: 		trans = &(state->trans[j]);
  570: 		if ((trans->to == -1) || (trans->atom == NULL))
  571: 		    continue;
  572:                 atomno = stringRemap[trans->atom->no];
  573: 		if ((trans->atom->data != NULL) && (transdata == NULL)) {
  574: 		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
  575: 			                            sizeof(void *));
  576: 		    if (transdata != NULL)
  577: 			memset(transdata, 0,
  578: 			       nbstates * nbatoms * sizeof(void *));
  579: 		    else {
  580: 			xmlRegexpErrMemory(ctxt, "compiling regexp");
  581: 			break;
  582: 		    }
  583: 		}
  584: 		targetno = stateRemap[trans->to];
  585: 		/*
  586: 		 * if the same atom can generate transitions to 2 different
  587: 		 * states then it means the automata is not determinist and
  588: 		 * the compact form can't be used !
  589: 		 */
  590: 		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
  591: 		if (prev != 0) {
  592: 		    if (prev != targetno + 1) {
  593: 			ret->determinist = 0;
  594: #ifdef DEBUG_COMPACTION
  595: 			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
  596: 			       i, j, trans->atom->no, trans->to, atomno, targetno);
  597: 			printf("       previous to is %d\n", prev);
  598: #endif
  599: 			if (transdata != NULL)
  600: 			    xmlFree(transdata);
  601: 			xmlFree(transitions);
  602: 			xmlFree(stateRemap);
  603: 			xmlFree(stringRemap);
  604: 			for (i = 0;i < nbatoms;i++)
  605: 			    xmlFree(stringMap[i]);
  606: 			xmlFree(stringMap);
  607: 			goto not_determ;
  608: 		    }
  609: 		} else {
  610: #if 0
  611: 		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
  612: 			   i, j, trans->atom->no, trans->to, atomno, targetno);
  613: #endif
  614: 		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
  615: 			targetno + 1; /* to avoid 0 */
  616: 		    if (transdata != NULL)
  617: 			transdata[stateno * nbatoms + atomno] =
  618: 			    trans->atom->data;
  619: 		}
  620: 	    }
  621: 	}
  622: 	ret->determinist = 1;
  623: #ifdef DEBUG_COMPACTION
  624: 	/*
  625: 	 * Debug
  626: 	 */
  627: 	for (i = 0;i < nbstates;i++) {
  628: 	    for (j = 0;j < nbatoms + 1;j++) {
  629:                 printf("%02d ", transitions[i * (nbatoms + 1) + j]);
  630: 	    }
  631: 	    printf("\n");
  632: 	}
  633: 	printf("\n");
  634: #endif
  635: 	/*
  636: 	 * Cleanup of the old data
  637: 	 */
  638: 	if (ret->states != NULL) {
  639: 	    for (i = 0;i < ret->nbStates;i++)
  640: 		xmlRegFreeState(ret->states[i]);
  641: 	    xmlFree(ret->states);
  642: 	}
  643: 	ret->states = NULL;
  644: 	ret->nbStates = 0;
  645: 	if (ret->atoms != NULL) {
  646: 	    for (i = 0;i < ret->nbAtoms;i++)
  647: 		xmlRegFreeAtom(ret->atoms[i]);
  648: 	    xmlFree(ret->atoms);
  649: 	}
  650: 	ret->atoms = NULL;
  651: 	ret->nbAtoms = 0;
  652: 
  653: 	ret->compact = transitions;
  654: 	ret->transdata = transdata;
  655: 	ret->stringMap = stringMap;
  656: 	ret->nbstrings = nbatoms;
  657: 	ret->nbstates = nbstates;
  658: 	xmlFree(stateRemap);
  659: 	xmlFree(stringRemap);
  660:     }
  661: not_determ:
  662:     ctxt->string = NULL;
  663:     ctxt->nbStates = 0;
  664:     ctxt->states = NULL;
  665:     ctxt->nbAtoms = 0;
  666:     ctxt->atoms = NULL;
  667:     ctxt->nbCounters = 0;
  668:     ctxt->counters = NULL;
  669:     return(ret);
  670: }
  671: 
  672: /**
  673:  * xmlRegNewParserCtxt:
  674:  * @string:  the string to parse
  675:  *
  676:  * Allocate a new regexp parser context
  677:  *
  678:  * Returns the new context or NULL in case of error
  679:  */
  680: static xmlRegParserCtxtPtr
  681: xmlRegNewParserCtxt(const xmlChar *string) {
  682:     xmlRegParserCtxtPtr ret;
  683: 
  684:     ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
  685:     if (ret == NULL)
  686: 	return(NULL);
  687:     memset(ret, 0, sizeof(xmlRegParserCtxt));
  688:     if (string != NULL)
  689: 	ret->string = xmlStrdup(string);
  690:     ret->cur = ret->string;
  691:     ret->neg = 0;
  692:     ret->negs = 0;
  693:     ret->error = 0;
  694:     ret->determinist = -1;
  695:     return(ret);
  696: }
  697: 
  698: /**
  699:  * xmlRegNewRange:
  700:  * @ctxt:  the regexp parser context
  701:  * @neg:  is that negative
  702:  * @type:  the type of range
  703:  * @start:  the start codepoint
  704:  * @end:  the end codepoint
  705:  *
  706:  * Allocate a new regexp range
  707:  *
  708:  * Returns the new range or NULL in case of error
  709:  */
  710: static xmlRegRangePtr
  711: xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
  712: 	       int neg, xmlRegAtomType type, int start, int end) {
  713:     xmlRegRangePtr ret;
  714: 
  715:     ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
  716:     if (ret == NULL) {
  717: 	xmlRegexpErrMemory(ctxt, "allocating range");
  718: 	return(NULL);
  719:     }
  720:     ret->neg = neg;
  721:     ret->type = type;
  722:     ret->start = start;
  723:     ret->end = end;
  724:     return(ret);
  725: }
  726: 
  727: /**
  728:  * xmlRegFreeRange:
  729:  * @range:  the regexp range
  730:  *
  731:  * Free a regexp range
  732:  */
  733: static void
  734: xmlRegFreeRange(xmlRegRangePtr range) {
  735:     if (range == NULL)
  736: 	return;
  737: 
  738:     if (range->blockName != NULL)
  739: 	xmlFree(range->blockName);
  740:     xmlFree(range);
  741: }
  742: 
  743: /**
  744:  * xmlRegCopyRange:
  745:  * @range:  the regexp range
  746:  *
  747:  * Copy a regexp range
  748:  *
  749:  * Returns the new copy or NULL in case of error.
  750:  */
  751: static xmlRegRangePtr
  752: xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
  753:     xmlRegRangePtr ret;
  754: 
  755:     if (range == NULL)
  756: 	return(NULL);
  757: 
  758:     ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
  759:                          range->end);
  760:     if (ret == NULL)
  761:         return(NULL);
  762:     if (range->blockName != NULL) {
  763: 	ret->blockName = xmlStrdup(range->blockName);
  764: 	if (ret->blockName == NULL) {
  765: 	    xmlRegexpErrMemory(ctxt, "allocating range");
  766: 	    xmlRegFreeRange(ret);
  767: 	    return(NULL);
  768: 	}
  769:     }
  770:     return(ret);
  771: }
  772: 
  773: /**
  774:  * xmlRegNewAtom:
  775:  * @ctxt:  the regexp parser context
  776:  * @type:  the type of atom
  777:  *
  778:  * Allocate a new atom
  779:  *
  780:  * Returns the new atom or NULL in case of error
  781:  */
  782: static xmlRegAtomPtr
  783: xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
  784:     xmlRegAtomPtr ret;
  785: 
  786:     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  787:     if (ret == NULL) {
  788: 	xmlRegexpErrMemory(ctxt, "allocating atom");
  789: 	return(NULL);
  790:     }
  791:     memset(ret, 0, sizeof(xmlRegAtom));
  792:     ret->type = type;
  793:     ret->quant = XML_REGEXP_QUANT_ONCE;
  794:     ret->min = 0;
  795:     ret->max = 0;
  796:     return(ret);
  797: }
  798: 
  799: /**
  800:  * xmlRegFreeAtom:
  801:  * @atom:  the regexp atom
  802:  *
  803:  * Free a regexp atom
  804:  */
  805: static void
  806: xmlRegFreeAtom(xmlRegAtomPtr atom) {
  807:     int i;
  808: 
  809:     if (atom == NULL)
  810: 	return;
  811: 
  812:     for (i = 0;i < atom->nbRanges;i++)
  813: 	xmlRegFreeRange(atom->ranges[i]);
  814:     if (atom->ranges != NULL)
  815: 	xmlFree(atom->ranges);
  816:     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
  817: 	xmlFree(atom->valuep);
  818:     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
  819: 	xmlFree(atom->valuep2);
  820:     if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
  821: 	xmlFree(atom->valuep);
  822:     xmlFree(atom);
  823: }
  824: 
  825: /**
  826:  * xmlRegCopyAtom:
  827:  * @ctxt:  the regexp parser context
  828:  * @atom:  the oiginal atom
  829:  *
  830:  * Allocate a new regexp range
  831:  *
  832:  * Returns the new atom or NULL in case of error
  833:  */
  834: static xmlRegAtomPtr
  835: xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
  836:     xmlRegAtomPtr ret;
  837: 
  838:     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  839:     if (ret == NULL) {
  840: 	xmlRegexpErrMemory(ctxt, "copying atom");
  841: 	return(NULL);
  842:     }
  843:     memset(ret, 0, sizeof(xmlRegAtom));
  844:     ret->type = atom->type;
  845:     ret->quant = atom->quant;
  846:     ret->min = atom->min;
  847:     ret->max = atom->max;
  848:     if (atom->nbRanges > 0) {
  849:         int i;
  850: 
  851:         ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
  852: 	                                           atom->nbRanges);
  853: 	if (ret->ranges == NULL) {
  854: 	    xmlRegexpErrMemory(ctxt, "copying atom");
  855: 	    goto error;
  856: 	}
  857: 	for (i = 0;i < atom->nbRanges;i++) {
  858: 	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
  859: 	    if (ret->ranges[i] == NULL)
  860: 	        goto error;
  861: 	    ret->nbRanges = i + 1;
  862: 	}
  863:     }
  864:     return(ret);
  865: 
  866: error:
  867:     xmlRegFreeAtom(ret);
  868:     return(NULL);
  869: }
  870: 
  871: static xmlRegStatePtr
  872: xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
  873:     xmlRegStatePtr ret;
  874: 
  875:     ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
  876:     if (ret == NULL) {
  877: 	xmlRegexpErrMemory(ctxt, "allocating state");
  878: 	return(NULL);
  879:     }
  880:     memset(ret, 0, sizeof(xmlRegState));
  881:     ret->type = XML_REGEXP_TRANS_STATE;
  882:     ret->mark = XML_REGEXP_MARK_NORMAL;
  883:     return(ret);
  884: }
  885: 
  886: /**
  887:  * xmlRegFreeState:
  888:  * @state:  the regexp state
  889:  *
  890:  * Free a regexp state
  891:  */
  892: static void
  893: xmlRegFreeState(xmlRegStatePtr state) {
  894:     if (state == NULL)
  895: 	return;
  896: 
  897:     if (state->trans != NULL)
  898: 	xmlFree(state->trans);
  899:     if (state->transTo != NULL)
  900: 	xmlFree(state->transTo);
  901:     xmlFree(state);
  902: }
  903: 
  904: /**
  905:  * xmlRegFreeParserCtxt:
  906:  * @ctxt:  the regexp parser context
  907:  *
  908:  * Free a regexp parser context
  909:  */
  910: static void
  911: xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
  912:     int i;
  913:     if (ctxt == NULL)
  914: 	return;
  915: 
  916:     if (ctxt->string != NULL)
  917: 	xmlFree(ctxt->string);
  918:     if (ctxt->states != NULL) {
  919: 	for (i = 0;i < ctxt->nbStates;i++)
  920: 	    xmlRegFreeState(ctxt->states[i]);
  921: 	xmlFree(ctxt->states);
  922:     }
  923:     if (ctxt->atoms != NULL) {
  924: 	for (i = 0;i < ctxt->nbAtoms;i++)
  925: 	    xmlRegFreeAtom(ctxt->atoms[i]);
  926: 	xmlFree(ctxt->atoms);
  927:     }
  928:     if (ctxt->counters != NULL)
  929: 	xmlFree(ctxt->counters);
  930:     xmlFree(ctxt);
  931: }
  932: 
  933: /************************************************************************
  934:  * 									*
  935:  * 			Display of Data structures			*
  936:  * 									*
  937:  ************************************************************************/
  938: 
  939: static void
  940: xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
  941:     switch (type) {
  942:         case XML_REGEXP_EPSILON:
  943: 	    fprintf(output, "epsilon "); break;
  944:         case XML_REGEXP_CHARVAL:
  945: 	    fprintf(output, "charval "); break;
  946:         case XML_REGEXP_RANGES:
  947: 	    fprintf(output, "ranges "); break;
  948:         case XML_REGEXP_SUBREG:
  949: 	    fprintf(output, "subexpr "); break;
  950:         case XML_REGEXP_STRING:
  951: 	    fprintf(output, "string "); break;
  952:         case XML_REGEXP_ANYCHAR:
  953: 	    fprintf(output, "anychar "); break;
  954:         case XML_REGEXP_ANYSPACE:
  955: 	    fprintf(output, "anyspace "); break;
  956:         case XML_REGEXP_NOTSPACE:
  957: 	    fprintf(output, "notspace "); break;
  958:         case XML_REGEXP_INITNAME:
  959: 	    fprintf(output, "initname "); break;
  960:         case XML_REGEXP_NOTINITNAME:
  961: 	    fprintf(output, "notinitname "); break;
  962:         case XML_REGEXP_NAMECHAR:
  963: 	    fprintf(output, "namechar "); break;
  964:         case XML_REGEXP_NOTNAMECHAR:
  965: 	    fprintf(output, "notnamechar "); break;
  966:         case XML_REGEXP_DECIMAL:
  967: 	    fprintf(output, "decimal "); break;
  968:         case XML_REGEXP_NOTDECIMAL:
  969: 	    fprintf(output, "notdecimal "); break;
  970:         case XML_REGEXP_REALCHAR:
  971: 	    fprintf(output, "realchar "); break;
  972:         case XML_REGEXP_NOTREALCHAR:
  973: 	    fprintf(output, "notrealchar "); break;
  974:         case XML_REGEXP_LETTER:
  975:             fprintf(output, "LETTER "); break;
  976:         case XML_REGEXP_LETTER_UPPERCASE:
  977:             fprintf(output, "LETTER_UPPERCASE "); break;
  978:         case XML_REGEXP_LETTER_LOWERCASE:
  979:             fprintf(output, "LETTER_LOWERCASE "); break;
  980:         case XML_REGEXP_LETTER_TITLECASE:
  981:             fprintf(output, "LETTER_TITLECASE "); break;
  982:         case XML_REGEXP_LETTER_MODIFIER:
  983:             fprintf(output, "LETTER_MODIFIER "); break;
  984:         case XML_REGEXP_LETTER_OTHERS:
  985:             fprintf(output, "LETTER_OTHERS "); break;
  986:         case XML_REGEXP_MARK:
  987:             fprintf(output, "MARK "); break;
  988:         case XML_REGEXP_MARK_NONSPACING:
  989:             fprintf(output, "MARK_NONSPACING "); break;
  990:         case XML_REGEXP_MARK_SPACECOMBINING:
  991:             fprintf(output, "MARK_SPACECOMBINING "); break;
  992:         case XML_REGEXP_MARK_ENCLOSING:
  993:             fprintf(output, "MARK_ENCLOSING "); break;
  994:         case XML_REGEXP_NUMBER:
  995:             fprintf(output, "NUMBER "); break;
  996:         case XML_REGEXP_NUMBER_DECIMAL:
  997:             fprintf(output, "NUMBER_DECIMAL "); break;
  998:         case XML_REGEXP_NUMBER_LETTER:
  999:             fprintf(output, "NUMBER_LETTER "); break;
 1000:         case XML_REGEXP_NUMBER_OTHERS:
 1001:             fprintf(output, "NUMBER_OTHERS "); break;
 1002:         case XML_REGEXP_PUNCT:
 1003:             fprintf(output, "PUNCT "); break;
 1004:         case XML_REGEXP_PUNCT_CONNECTOR:
 1005:             fprintf(output, "PUNCT_CONNECTOR "); break;
 1006:         case XML_REGEXP_PUNCT_DASH:
 1007:             fprintf(output, "PUNCT_DASH "); break;
 1008:         case XML_REGEXP_PUNCT_OPEN:
 1009:             fprintf(output, "PUNCT_OPEN "); break;
 1010:         case XML_REGEXP_PUNCT_CLOSE:
 1011:             fprintf(output, "PUNCT_CLOSE "); break;
 1012:         case XML_REGEXP_PUNCT_INITQUOTE:
 1013:             fprintf(output, "PUNCT_INITQUOTE "); break;
 1014:         case XML_REGEXP_PUNCT_FINQUOTE:
 1015:             fprintf(output, "PUNCT_FINQUOTE "); break;
 1016:         case XML_REGEXP_PUNCT_OTHERS:
 1017:             fprintf(output, "PUNCT_OTHERS "); break;
 1018:         case XML_REGEXP_SEPAR:
 1019:             fprintf(output, "SEPAR "); break;
 1020:         case XML_REGEXP_SEPAR_SPACE:
 1021:             fprintf(output, "SEPAR_SPACE "); break;
 1022:         case XML_REGEXP_SEPAR_LINE:
 1023:             fprintf(output, "SEPAR_LINE "); break;
 1024:         case XML_REGEXP_SEPAR_PARA:
 1025:             fprintf(output, "SEPAR_PARA "); break;
 1026:         case XML_REGEXP_SYMBOL:
 1027:             fprintf(output, "SYMBOL "); break;
 1028:         case XML_REGEXP_SYMBOL_MATH:
 1029:             fprintf(output, "SYMBOL_MATH "); break;
 1030:         case XML_REGEXP_SYMBOL_CURRENCY:
 1031:             fprintf(output, "SYMBOL_CURRENCY "); break;
 1032:         case XML_REGEXP_SYMBOL_MODIFIER:
 1033:             fprintf(output, "SYMBOL_MODIFIER "); break;
 1034:         case XML_REGEXP_SYMBOL_OTHERS:
 1035:             fprintf(output, "SYMBOL_OTHERS "); break;
 1036:         case XML_REGEXP_OTHER:
 1037:             fprintf(output, "OTHER "); break;
 1038:         case XML_REGEXP_OTHER_CONTROL:
 1039:             fprintf(output, "OTHER_CONTROL "); break;
 1040:         case XML_REGEXP_OTHER_FORMAT:
 1041:             fprintf(output, "OTHER_FORMAT "); break;
 1042:         case XML_REGEXP_OTHER_PRIVATE:
 1043:             fprintf(output, "OTHER_PRIVATE "); break;
 1044:         case XML_REGEXP_OTHER_NA:
 1045:             fprintf(output, "OTHER_NA "); break;
 1046:         case XML_REGEXP_BLOCK_NAME:
 1047: 	    fprintf(output, "BLOCK "); break;
 1048:     }
 1049: }
 1050: 
 1051: static void
 1052: xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
 1053:     switch (type) {
 1054:         case XML_REGEXP_QUANT_EPSILON:
 1055: 	    fprintf(output, "epsilon "); break;
 1056:         case XML_REGEXP_QUANT_ONCE:
 1057: 	    fprintf(output, "once "); break;
 1058:         case XML_REGEXP_QUANT_OPT:
 1059: 	    fprintf(output, "? "); break;
 1060:         case XML_REGEXP_QUANT_MULT:
 1061: 	    fprintf(output, "* "); break;
 1062:         case XML_REGEXP_QUANT_PLUS:
 1063: 	    fprintf(output, "+ "); break;
 1064: 	case XML_REGEXP_QUANT_RANGE:
 1065: 	    fprintf(output, "range "); break;
 1066: 	case XML_REGEXP_QUANT_ONCEONLY:
 1067: 	    fprintf(output, "onceonly "); break;
 1068: 	case XML_REGEXP_QUANT_ALL:
 1069: 	    fprintf(output, "all "); break;
 1070:     }
 1071: }
 1072: static void
 1073: xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
 1074:     fprintf(output, "  range: ");
 1075:     if (range->neg)
 1076: 	fprintf(output, "negative ");
 1077:     xmlRegPrintAtomType(output, range->type);
 1078:     fprintf(output, "%c - %c\n", range->start, range->end);
 1079: }
 1080: 
 1081: static void
 1082: xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
 1083:     fprintf(output, " atom: ");
 1084:     if (atom == NULL) {
 1085: 	fprintf(output, "NULL\n");
 1086: 	return;
 1087:     }
 1088:     if (atom->neg)
 1089:         fprintf(output, "not ");
 1090:     xmlRegPrintAtomType(output, atom->type);
 1091:     xmlRegPrintQuantType(output, atom->quant);
 1092:     if (atom->quant == XML_REGEXP_QUANT_RANGE)
 1093: 	fprintf(output, "%d-%d ", atom->min, atom->max);
 1094:     if (atom->type == XML_REGEXP_STRING)
 1095: 	fprintf(output, "'%s' ", (char *) atom->valuep);
 1096:     if (atom->type == XML_REGEXP_CHARVAL)
 1097: 	fprintf(output, "char %c\n", atom->codepoint);
 1098:     else if (atom->type == XML_REGEXP_RANGES) {
 1099: 	int i;
 1100: 	fprintf(output, "%d entries\n", atom->nbRanges);
 1101: 	for (i = 0; i < atom->nbRanges;i++)
 1102: 	    xmlRegPrintRange(output, atom->ranges[i]);
 1103:     } else if (atom->type == XML_REGEXP_SUBREG) {
 1104: 	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
 1105:     } else {
 1106: 	fprintf(output, "\n");
 1107:     }
 1108: }
 1109: 
 1110: static void
 1111: xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
 1112:     fprintf(output, "  trans: ");
 1113:     if (trans == NULL) {
 1114: 	fprintf(output, "NULL\n");
 1115: 	return;
 1116:     }
 1117:     if (trans->to < 0) {
 1118: 	fprintf(output, "removed\n");
 1119: 	return;
 1120:     }
 1121:     if (trans->nd != 0) {
 1122: 	if (trans->nd == 2)
 1123: 	    fprintf(output, "last not determinist, ");
 1124: 	else
 1125: 	    fprintf(output, "not determinist, ");
 1126:     }
 1127:     if (trans->counter >= 0) {
 1128: 	fprintf(output, "counted %d, ", trans->counter);
 1129:     }
 1130:     if (trans->count == REGEXP_ALL_COUNTER) {
 1131: 	fprintf(output, "all transition, ");
 1132:     } else if (trans->count >= 0) {
 1133: 	fprintf(output, "count based %d, ", trans->count);
 1134:     }
 1135:     if (trans->atom == NULL) {
 1136: 	fprintf(output, "epsilon to %d\n", trans->to);
 1137: 	return;
 1138:     }
 1139:     if (trans->atom->type == XML_REGEXP_CHARVAL)
 1140: 	fprintf(output, "char %c ", trans->atom->codepoint);
 1141:     fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
 1142: }
 1143:     
 1144: static void
 1145: xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
 1146:     int i;
 1147: 
 1148:     fprintf(output, " state: ");
 1149:     if (state == NULL) {
 1150: 	fprintf(output, "NULL\n");
 1151: 	return;
 1152:     }
 1153:     if (state->type == XML_REGEXP_START_STATE)
 1154: 	fprintf(output, "START ");
 1155:     if (state->type == XML_REGEXP_FINAL_STATE)
 1156: 	fprintf(output, "FINAL ");
 1157:     
 1158:     fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
 1159:     for (i = 0;i < state->nbTrans; i++) {
 1160: 	xmlRegPrintTrans(output, &(state->trans[i]));
 1161:     }
 1162: }
 1163: 
 1164: #ifdef DEBUG_REGEXP_GRAPH
 1165: static void
 1166: xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
 1167:     int i;
 1168: 
 1169:     fprintf(output, " ctxt: ");
 1170:     if (ctxt == NULL) {
 1171: 	fprintf(output, "NULL\n");
 1172: 	return;
 1173:     }
 1174:     fprintf(output, "'%s' ", ctxt->string);
 1175:     if (ctxt->error)
 1176: 	fprintf(output, "error ");
 1177:     if (ctxt->neg)
 1178: 	fprintf(output, "neg ");
 1179:     fprintf(output, "\n");
 1180:     fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
 1181:     for (i = 0;i < ctxt->nbAtoms; i++) {
 1182: 	fprintf(output, " %02d ", i);
 1183: 	xmlRegPrintAtom(output, ctxt->atoms[i]);
 1184:     }
 1185:     if (ctxt->atom != NULL) {
 1186: 	fprintf(output, "current atom:\n");
 1187: 	xmlRegPrintAtom(output, ctxt->atom);
 1188:     }
 1189:     fprintf(output, "%d states:", ctxt->nbStates);
 1190:     if (ctxt->start != NULL)
 1191: 	fprintf(output, " start: %d", ctxt->start->no);
 1192:     if (ctxt->end != NULL)
 1193: 	fprintf(output, " end: %d", ctxt->end->no);
 1194:     fprintf(output, "\n");
 1195:     for (i = 0;i < ctxt->nbStates; i++) {
 1196: 	xmlRegPrintState(output, ctxt->states[i]);
 1197:     }
 1198:     fprintf(output, "%d counters:\n", ctxt->nbCounters);
 1199:     for (i = 0;i < ctxt->nbCounters; i++) {
 1200: 	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
 1201: 		                                ctxt->counters[i].max);
 1202:     }
 1203: }
 1204: #endif
 1205: 
 1206: /************************************************************************
 1207:  * 									*
 1208:  *		 Finite Automata structures manipulations		*
 1209:  * 									*
 1210:  ************************************************************************/
 1211: 
 1212: static void 
 1213: xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
 1214: 	           int neg, xmlRegAtomType type, int start, int end,
 1215: 		   xmlChar *blockName) {
 1216:     xmlRegRangePtr range;
 1217: 
 1218:     if (atom == NULL) {
 1219: 	ERROR("add range: atom is NULL");
 1220: 	return;
 1221:     }
 1222:     if (atom->type != XML_REGEXP_RANGES) {
 1223: 	ERROR("add range: atom is not ranges");
 1224: 	return;
 1225:     }
 1226:     if (atom->maxRanges == 0) {
 1227: 	atom->maxRanges = 4;
 1228: 	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
 1229: 		                             sizeof(xmlRegRangePtr));
 1230: 	if (atom->ranges == NULL) {
 1231: 	    xmlRegexpErrMemory(ctxt, "adding ranges");
 1232: 	    atom->maxRanges = 0;
 1233: 	    return;
 1234: 	}
 1235:     } else if (atom->nbRanges >= atom->maxRanges) {
 1236: 	xmlRegRangePtr *tmp;
 1237: 	atom->maxRanges *= 2;
 1238: 	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
 1239: 		                             sizeof(xmlRegRangePtr));
 1240: 	if (tmp == NULL) {
 1241: 	    xmlRegexpErrMemory(ctxt, "adding ranges");
 1242: 	    atom->maxRanges /= 2;
 1243: 	    return;
 1244: 	}
 1245: 	atom->ranges = tmp;
 1246:     }
 1247:     range = xmlRegNewRange(ctxt, neg, type, start, end);
 1248:     if (range == NULL)
 1249: 	return;
 1250:     range->blockName = blockName;
 1251:     atom->ranges[atom->nbRanges++] = range;
 1252:     
 1253: }
 1254: 
 1255: static int
 1256: xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
 1257:     if (ctxt->maxCounters == 0) {
 1258: 	ctxt->maxCounters = 4;
 1259: 	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
 1260: 		                             sizeof(xmlRegCounter));
 1261: 	if (ctxt->counters == NULL) {
 1262: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1263: 	    ctxt->maxCounters = 0;
 1264: 	    return(-1);
 1265: 	}
 1266:     } else if (ctxt->nbCounters >= ctxt->maxCounters) {
 1267: 	xmlRegCounter *tmp;
 1268: 	ctxt->maxCounters *= 2;
 1269: 	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
 1270: 		                           sizeof(xmlRegCounter));
 1271: 	if (tmp == NULL) {
 1272: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1273: 	    ctxt->maxCounters /= 2;
 1274: 	    return(-1);
 1275: 	}
 1276: 	ctxt->counters = tmp;
 1277:     }
 1278:     ctxt->counters[ctxt->nbCounters].min = -1;
 1279:     ctxt->counters[ctxt->nbCounters].max = -1;
 1280:     return(ctxt->nbCounters++);
 1281: }
 1282: 
 1283: static int 
 1284: xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
 1285:     if (atom == NULL) {
 1286: 	ERROR("atom push: atom is NULL");
 1287: 	return(-1);
 1288:     }
 1289:     if (ctxt->maxAtoms == 0) {
 1290: 	ctxt->maxAtoms = 4;
 1291: 	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
 1292: 		                             sizeof(xmlRegAtomPtr));
 1293: 	if (ctxt->atoms == NULL) {
 1294: 	    xmlRegexpErrMemory(ctxt, "pushing atom");
 1295: 	    ctxt->maxAtoms = 0;
 1296: 	    return(-1);
 1297: 	}
 1298:     } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
 1299: 	xmlRegAtomPtr *tmp;
 1300: 	ctxt->maxAtoms *= 2;
 1301: 	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
 1302: 		                             sizeof(xmlRegAtomPtr));
 1303: 	if (tmp == NULL) {
 1304: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1305: 	    ctxt->maxAtoms /= 2;
 1306: 	    return(-1);
 1307: 	}
 1308: 	ctxt->atoms = tmp;
 1309:     }
 1310:     atom->no = ctxt->nbAtoms;
 1311:     ctxt->atoms[ctxt->nbAtoms++] = atom;
 1312:     return(0);
 1313: }
 1314: 
 1315: static void 
 1316: xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
 1317:                       int from) {
 1318:     if (target->maxTransTo == 0) {
 1319: 	target->maxTransTo = 8;
 1320: 	target->transTo = (int *) xmlMalloc(target->maxTransTo *
 1321: 		                             sizeof(int));
 1322: 	if (target->transTo == NULL) {
 1323: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1324: 	    target->maxTransTo = 0;
 1325: 	    return;
 1326: 	}
 1327:     } else if (target->nbTransTo >= target->maxTransTo) {
 1328: 	int *tmp;
 1329: 	target->maxTransTo *= 2;
 1330: 	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
 1331: 		                             sizeof(int));
 1332: 	if (tmp == NULL) {
 1333: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1334: 	    target->maxTransTo /= 2;
 1335: 	    return;
 1336: 	}
 1337: 	target->transTo = tmp;
 1338:     }
 1339:     target->transTo[target->nbTransTo] = from;
 1340:     target->nbTransTo++;
 1341: }
 1342: 
 1343: static void 
 1344: xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 1345: 	            xmlRegAtomPtr atom, xmlRegStatePtr target,
 1346: 		    int counter, int count) {
 1347: 
 1348:     int nrtrans;
 1349: 
 1350:     if (state == NULL) {
 1351: 	ERROR("add state: state is NULL");
 1352: 	return;
 1353:     }
 1354:     if (target == NULL) {
 1355: 	ERROR("add state: target is NULL");
 1356: 	return;
 1357:     }
 1358:     /*
 1359:      * Other routines follow the philosophy 'When in doubt, add a transition'
 1360:      * so we check here whether such a transition is already present and, if
 1361:      * so, silently ignore this request.
 1362:      */
 1363: 
 1364:     for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
 1365: 	xmlRegTransPtr trans = &(state->trans[nrtrans]);
 1366: 	if ((trans->atom == atom) &&
 1367: 	    (trans->to == target->no) &&
 1368: 	    (trans->counter == counter) &&
 1369: 	    (trans->count == count)) {
 1370: #ifdef DEBUG_REGEXP_GRAPH
 1371: 	    printf("Ignoring duplicate transition from %d to %d\n",
 1372: 		    state->no, target->no);
 1373: #endif
 1374: 	    return;
 1375: 	}
 1376:     }
 1377: 
 1378:     if (state->maxTrans == 0) {
 1379: 	state->maxTrans = 8;
 1380: 	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
 1381: 		                             sizeof(xmlRegTrans));
 1382: 	if (state->trans == NULL) {
 1383: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1384: 	    state->maxTrans = 0;
 1385: 	    return;
 1386: 	}
 1387:     } else if (state->nbTrans >= state->maxTrans) {
 1388: 	xmlRegTrans *tmp;
 1389: 	state->maxTrans *= 2;
 1390: 	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
 1391: 		                             sizeof(xmlRegTrans));
 1392: 	if (tmp == NULL) {
 1393: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1394: 	    state->maxTrans /= 2;
 1395: 	    return;
 1396: 	}
 1397: 	state->trans = tmp;
 1398:     }
 1399: #ifdef DEBUG_REGEXP_GRAPH
 1400:     printf("Add trans from %d to %d ", state->no, target->no);
 1401:     if (count == REGEXP_ALL_COUNTER)
 1402: 	printf("all transition\n");
 1403:     else if (count >= 0)
 1404: 	printf("count based %d\n", count);
 1405:     else if (counter >= 0)
 1406: 	printf("counted %d\n", counter);
 1407:     else if (atom == NULL)
 1408: 	printf("epsilon transition\n");
 1409:     else if (atom != NULL) 
 1410:         xmlRegPrintAtom(stdout, atom);
 1411: #endif
 1412: 
 1413:     state->trans[state->nbTrans].atom = atom;
 1414:     state->trans[state->nbTrans].to = target->no;
 1415:     state->trans[state->nbTrans].counter = counter;
 1416:     state->trans[state->nbTrans].count = count;
 1417:     state->trans[state->nbTrans].nd = 0;
 1418:     state->nbTrans++;
 1419:     xmlRegStateAddTransTo(ctxt, target, state->no);
 1420: }
 1421: 
 1422: static int
 1423: xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
 1424:     if (state == NULL) return(-1);
 1425:     if (ctxt->maxStates == 0) {
 1426: 	ctxt->maxStates = 4;
 1427: 	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
 1428: 		                             sizeof(xmlRegStatePtr));
 1429: 	if (ctxt->states == NULL) {
 1430: 	    xmlRegexpErrMemory(ctxt, "adding state");
 1431: 	    ctxt->maxStates = 0;
 1432: 	    return(-1);
 1433: 	}
 1434:     } else if (ctxt->nbStates >= ctxt->maxStates) {
 1435: 	xmlRegStatePtr *tmp;
 1436: 	ctxt->maxStates *= 2;
 1437: 	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
 1438: 		                             sizeof(xmlRegStatePtr));
 1439: 	if (tmp == NULL) {
 1440: 	    xmlRegexpErrMemory(ctxt, "adding state");
 1441: 	    ctxt->maxStates /= 2;
 1442: 	    return(-1);
 1443: 	}
 1444: 	ctxt->states = tmp;
 1445:     }
 1446:     state->no = ctxt->nbStates;
 1447:     ctxt->states[ctxt->nbStates++] = state;
 1448:     return(0);
 1449: }
 1450: 
 1451: /**
 1452:  * xmlFAGenerateAllTransition:
 1453:  * @ctxt:  a regexp parser context
 1454:  * @from:  the from state
 1455:  * @to:  the target state or NULL for building a new one
 1456:  * @lax:
 1457:  *
 1458:  */
 1459: static void
 1460: xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
 1461: 			   xmlRegStatePtr from, xmlRegStatePtr to,
 1462: 			   int lax) {
 1463:     if (to == NULL) {
 1464: 	to = xmlRegNewState(ctxt);
 1465: 	xmlRegStatePush(ctxt, to);
 1466: 	ctxt->state = to;
 1467:     }
 1468:     if (lax)
 1469: 	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
 1470:     else
 1471: 	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
 1472: }
 1473: 
 1474: /**
 1475:  * xmlFAGenerateEpsilonTransition:
 1476:  * @ctxt:  a regexp parser context
 1477:  * @from:  the from state
 1478:  * @to:  the target state or NULL for building a new one
 1479:  *
 1480:  */
 1481: static void
 1482: xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
 1483: 			       xmlRegStatePtr from, xmlRegStatePtr to) {
 1484:     if (to == NULL) {
 1485: 	to = xmlRegNewState(ctxt);
 1486: 	xmlRegStatePush(ctxt, to);
 1487: 	ctxt->state = to;
 1488:     }
 1489:     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
 1490: }
 1491: 
 1492: /**
 1493:  * xmlFAGenerateCountedEpsilonTransition:
 1494:  * @ctxt:  a regexp parser context
 1495:  * @from:  the from state
 1496:  * @to:  the target state or NULL for building a new one
 1497:  * counter:  the counter for that transition
 1498:  *
 1499:  */
 1500: static void
 1501: xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
 1502: 	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
 1503:     if (to == NULL) {
 1504: 	to = xmlRegNewState(ctxt);
 1505: 	xmlRegStatePush(ctxt, to);
 1506: 	ctxt->state = to;
 1507:     }
 1508:     xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
 1509: }
 1510: 
 1511: /**
 1512:  * xmlFAGenerateCountedTransition:
 1513:  * @ctxt:  a regexp parser context
 1514:  * @from:  the from state
 1515:  * @to:  the target state or NULL for building a new one
 1516:  * counter:  the counter for that transition
 1517:  *
 1518:  */
 1519: static void
 1520: xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
 1521: 	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
 1522:     if (to == NULL) {
 1523: 	to = xmlRegNewState(ctxt);
 1524: 	xmlRegStatePush(ctxt, to);
 1525: 	ctxt->state = to;
 1526:     }
 1527:     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
 1528: }
 1529: 
 1530: /**
 1531:  * xmlFAGenerateTransitions:
 1532:  * @ctxt:  a regexp parser context
 1533:  * @from:  the from state
 1534:  * @to:  the target state or NULL for building a new one
 1535:  * @atom:  the atom generating the transition
 1536:  *
 1537:  * Returns 0 if success and -1 in case of error.
 1538:  */
 1539: static int
 1540: xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 1541: 	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
 1542:     xmlRegStatePtr end;
 1543: 
 1544:     if (atom == NULL) {
 1545: 	ERROR("genrate transition: atom == NULL");
 1546: 	return(-1);
 1547:     }
 1548:     if (atom->type == XML_REGEXP_SUBREG) {
 1549: 	/*
 1550: 	 * this is a subexpression handling one should not need to
 1551: 	 * create a new node except for XML_REGEXP_QUANT_RANGE.
 1552: 	 */
 1553: 	if (xmlRegAtomPush(ctxt, atom) < 0) {
 1554: 	    return(-1);
 1555: 	}
 1556: 	if ((to != NULL) && (atom->stop != to) &&
 1557: 	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
 1558: 	    /*
 1559: 	     * Generate an epsilon transition to link to the target
 1560: 	     */
 1561: 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 1562: #ifdef DV
 1563: 	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
 1564: 		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
 1565: 	    to = xmlRegNewState(ctxt);
 1566: 	    xmlRegStatePush(ctxt, to);
 1567: 	    ctxt->state = to;
 1568: 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 1569: #endif
 1570: 	}
 1571: 	switch (atom->quant) {
 1572: 	    case XML_REGEXP_QUANT_OPT:
 1573: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1574: 		/*
 1575: 		 * transition done to the state after end of atom.
 1576: 		 *      1. set transition from atom start to new state
 1577: 		 *      2. set transition from atom end to this state. 
 1578: 		 */
 1579:                 if (to == NULL) {
 1580:                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
 1581:                     xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
 1582:                                                    ctxt->state);
 1583:                 } else {
 1584:                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
 1585:                 }
 1586: 		break;
 1587: 	    case XML_REGEXP_QUANT_MULT:
 1588: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1589: 		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
 1590: 		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
 1591: 		break;
 1592: 	    case XML_REGEXP_QUANT_PLUS:
 1593: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1594: 		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
 1595: 		break;
 1596: 	    case XML_REGEXP_QUANT_RANGE: {
 1597: 		int counter;
 1598: 		xmlRegStatePtr inter, newstate;
 1599: 
 1600: 		/*
 1601: 		 * create the final state now if needed
 1602: 		 */
 1603: 		if (to != NULL) {
 1604: 		    newstate = to;
 1605: 		} else {
 1606: 		    newstate = xmlRegNewState(ctxt);
 1607: 		    xmlRegStatePush(ctxt, newstate);
 1608: 		}
 1609: 
 1610: 		/*
 1611: 		 * The principle here is to use counted transition
 1612: 		 * to avoid explosion in the number of states in the
 1613: 		 * graph. This is clearly more complex but should not
 1614: 		 * be exploitable at runtime.
 1615: 		 */
 1616: 		if ((atom->min == 0) && (atom->start0 == NULL)) {
 1617: 		    xmlRegAtomPtr copy;
 1618: 		    /*
 1619: 		     * duplicate a transition based on atom to count next
 1620: 		     * occurences after 1. We cannot loop to atom->start
 1621: 		     * directly because we need an epsilon transition to 
 1622: 		     * newstate.
 1623: 		     */
 1624: 		     /* ???? For some reason it seems we never reach that
 1625: 		        case, I suppose this got optimized out before when
 1626: 			building the automata */
 1627: 		    copy = xmlRegCopyAtom(ctxt, atom);
 1628: 		    if (copy == NULL)
 1629: 		        return(-1);
 1630: 		    copy->quant = XML_REGEXP_QUANT_ONCE;
 1631: 		    copy->min = 0;
 1632: 		    copy->max = 0;
 1633: 
 1634: 		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
 1635: 		        < 0)
 1636: 			return(-1);
 1637: 		    inter = ctxt->state;
 1638: 		    counter = xmlRegGetCounter(ctxt);
 1639: 		    ctxt->counters[counter].min = atom->min - 1;
 1640: 		    ctxt->counters[counter].max = atom->max - 1;
 1641: 		    /* count the number of times we see it again */
 1642: 		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
 1643: 						   atom->stop, counter);
 1644: 		    /* allow a way out based on the count */
 1645: 		    xmlFAGenerateCountedTransition(ctxt, inter,
 1646: 			                           newstate, counter);
 1647: 		    /* and also allow a direct exit for 0 */
 1648: 		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
 1649: 		                                   newstate);
 1650: 		} else {
 1651: 		    /*
 1652: 		     * either we need the atom at least once or there
 1653: 		     * is an atom->start0 allowing to easilly plug the
 1654: 		     * epsilon transition.
 1655: 		     */
 1656: 		    counter = xmlRegGetCounter(ctxt);
 1657: 		    ctxt->counters[counter].min = atom->min - 1;
 1658: 		    ctxt->counters[counter].max = atom->max - 1;
 1659: 		    /* count the number of times we see it again */
 1660: 		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
 1661: 						   atom->start, counter);
 1662: 		    /* allow a way out based on the count */
 1663: 		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
 1664: 			                           newstate, counter);
 1665: 		    /* and if needed allow a direct exit for 0 */
 1666: 		    if (atom->min == 0)
 1667: 			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
 1668: 						       newstate);
 1669: 
 1670: 		}
 1671: 		atom->min = 0;
 1672: 		atom->max = 0;
 1673: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1674: 		ctxt->state = newstate;
 1675: 	    }
 1676: 	    default:
 1677: 		break;
 1678: 	}
 1679: 	return(0);
 1680:     } 
 1681:     if ((atom->min == 0) && (atom->max == 0) &&
 1682:                (atom->quant == XML_REGEXP_QUANT_RANGE)) {
 1683:         /*
 1684: 	 * we can discard the atom and generate an epsilon transition instead
 1685: 	 */
 1686: 	if (to == NULL) {
 1687: 	    to = xmlRegNewState(ctxt);
 1688: 	    if (to != NULL)
 1689: 		xmlRegStatePush(ctxt, to);
 1690: 	    else {
 1691: 		return(-1);
 1692: 	    }
 1693: 	}
 1694: 	xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1695: 	ctxt->state = to;
 1696: 	xmlRegFreeAtom(atom);
 1697: 	return(0);
 1698:     }
 1699:     if (to == NULL) {
 1700: 	to = xmlRegNewState(ctxt);
 1701: 	if (to != NULL)
 1702: 	    xmlRegStatePush(ctxt, to);
 1703: 	else {
 1704: 	    return(-1);
 1705: 	}
 1706:     } 
 1707:     end = to;
 1708:     if ((atom->quant == XML_REGEXP_QUANT_MULT) || 
 1709:         (atom->quant == XML_REGEXP_QUANT_PLUS)) {
 1710: 	/*
 1711: 	 * Do not pollute the target state by adding transitions from
 1712: 	 * it as it is likely to be the shared target of multiple branches.
 1713: 	 * So isolate with an epsilon transition.
 1714: 	 */
 1715:         xmlRegStatePtr tmp;
 1716: 	
 1717: 	tmp = xmlRegNewState(ctxt);
 1718: 	if (tmp != NULL)
 1719: 	    xmlRegStatePush(ctxt, tmp);
 1720: 	else {
 1721: 	    return(-1);
 1722: 	}
 1723: 	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
 1724: 	to = tmp;
 1725:     }
 1726:     if (xmlRegAtomPush(ctxt, atom) < 0) {
 1727: 	return(-1);
 1728:     }
 1729:     xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
 1730:     ctxt->state = end;
 1731:     switch (atom->quant) {
 1732: 	case XML_REGEXP_QUANT_OPT:
 1733: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1734: 	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1735: 	    break;
 1736: 	case XML_REGEXP_QUANT_MULT:
 1737: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1738: 	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1739: 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 1740: 	    break;
 1741: 	case XML_REGEXP_QUANT_PLUS:
 1742: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1743: 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 1744: 	    break;
 1745: 	case XML_REGEXP_QUANT_RANGE: 
 1746: #if DV_test
 1747: 	    if (atom->min == 0) {
 1748: 		xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1749: 	    }
 1750: #endif
 1751: 	    break;
 1752: 	default:
 1753: 	    break;
 1754:     }
 1755:     return(0);
 1756: }
 1757: 
 1758: /**
 1759:  * xmlFAReduceEpsilonTransitions:
 1760:  * @ctxt:  a regexp parser context
 1761:  * @fromnr:  the from state
 1762:  * @tonr:  the to state 
 1763:  * @counter:  should that transition be associated to a counted
 1764:  *
 1765:  */
 1766: static void
 1767: xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
 1768: 	                      int tonr, int counter) {
 1769:     int transnr;
 1770:     xmlRegStatePtr from;
 1771:     xmlRegStatePtr to;
 1772: 
 1773: #ifdef DEBUG_REGEXP_GRAPH
 1774:     printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
 1775: #endif
 1776:     from = ctxt->states[fromnr];
 1777:     if (from == NULL)
 1778: 	return;
 1779:     to = ctxt->states[tonr];
 1780:     if (to == NULL)
 1781: 	return;
 1782:     if ((to->mark == XML_REGEXP_MARK_START) ||
 1783: 	(to->mark == XML_REGEXP_MARK_VISITED))
 1784: 	return;
 1785: 
 1786:     to->mark = XML_REGEXP_MARK_VISITED;
 1787:     if (to->type == XML_REGEXP_FINAL_STATE) {
 1788: #ifdef DEBUG_REGEXP_GRAPH
 1789: 	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
 1790: #endif
 1791: 	from->type = XML_REGEXP_FINAL_STATE;
 1792:     }
 1793:     for (transnr = 0;transnr < to->nbTrans;transnr++) {
 1794:         if (to->trans[transnr].to < 0)
 1795: 	    continue;
 1796: 	if (to->trans[transnr].atom == NULL) {
 1797: 	    /*
 1798: 	     * Don't remove counted transitions
 1799: 	     * Don't loop either
 1800: 	     */
 1801: 	    if (to->trans[transnr].to != fromnr) {
 1802: 		if (to->trans[transnr].count >= 0) {
 1803: 		    int newto = to->trans[transnr].to;
 1804: 
 1805: 		    xmlRegStateAddTrans(ctxt, from, NULL,
 1806: 					ctxt->states[newto], 
 1807: 					-1, to->trans[transnr].count);
 1808: 		} else {
 1809: #ifdef DEBUG_REGEXP_GRAPH
 1810: 		    printf("Found epsilon trans %d from %d to %d\n",
 1811: 			   transnr, tonr, to->trans[transnr].to);
 1812: #endif
 1813: 		    if (to->trans[transnr].counter >= 0) {
 1814: 			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
 1815: 					      to->trans[transnr].to,
 1816: 					      to->trans[transnr].counter);
 1817: 		    } else {
 1818: 			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
 1819: 					      to->trans[transnr].to,
 1820: 					      counter);
 1821: 		    }
 1822: 		}
 1823: 	    }
 1824: 	} else {
 1825: 	    int newto = to->trans[transnr].to;
 1826: 
 1827: 	    if (to->trans[transnr].counter >= 0) {
 1828: 		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
 1829: 				    ctxt->states[newto], 
 1830: 				    to->trans[transnr].counter, -1);
 1831: 	    } else {
 1832: 		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
 1833: 				    ctxt->states[newto], counter, -1);
 1834: 	    }
 1835: 	}
 1836:     }
 1837:     to->mark = XML_REGEXP_MARK_NORMAL;
 1838: }
 1839: 
 1840: /**
 1841:  * xmlFAEliminateSimpleEpsilonTransitions:
 1842:  * @ctxt:  a regexp parser context
 1843:  *
 1844:  * Eliminating general epsilon transitions can get costly in the general 
 1845:  * algorithm due to the large amount of generated new transitions and
 1846:  * associated comparisons. However for simple epsilon transition used just
 1847:  * to separate building blocks when generating the automata this can be
 1848:  * reduced to state elimination:
 1849:  *    - if there exists an epsilon from X to Y
 1850:  *    - if there is no other transition from X
 1851:  * then X and Y are semantically equivalent and X can be eliminated
 1852:  * If X is the start state then make Y the start state, else replace the
 1853:  * target of all transitions to X by transitions to Y.
 1854:  */
 1855: static void
 1856: xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 1857:     int statenr, i, j, newto;
 1858:     xmlRegStatePtr state, tmp;
 1859: 
 1860:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1861: 	state = ctxt->states[statenr];
 1862: 	if (state == NULL)
 1863: 	    continue;
 1864: 	if (state->nbTrans != 1)
 1865: 	    continue;
 1866: 	if (state->type == XML_REGEXP_UNREACH_STATE)
 1867: 	    continue;
 1868: 	/* is the only transition out a basic transition */
 1869: 	if ((state->trans[0].atom == NULL) &&
 1870: 	    (state->trans[0].to >= 0) &&
 1871: 	    (state->trans[0].to != statenr) &&
 1872: 	    (state->trans[0].counter < 0) &&
 1873: 	    (state->trans[0].count < 0)) {
 1874: 	    newto = state->trans[0].to;
 1875: 
 1876:             if (state->type == XML_REGEXP_START_STATE) {
 1877: #ifdef DEBUG_REGEXP_GRAPH
 1878: 		printf("Found simple epsilon trans from start %d to %d\n",
 1879: 		       statenr, newto);
 1880: #endif     
 1881:             } else {
 1882: #ifdef DEBUG_REGEXP_GRAPH
 1883: 		printf("Found simple epsilon trans from %d to %d\n",
 1884: 		       statenr, newto);
 1885: #endif     
 1886: 	        for (i = 0;i < state->nbTransTo;i++) {
 1887: 		    tmp = ctxt->states[state->transTo[i]];
 1888: 		    for (j = 0;j < tmp->nbTrans;j++) {
 1889: 			if (tmp->trans[j].to == statenr) {
 1890: #ifdef DEBUG_REGEXP_GRAPH
 1891: 			    printf("Changed transition %d on %d to go to %d\n",
 1892: 				   j, tmp->no, newto);
 1893: #endif     
 1894: 			    tmp->trans[j].to = -1;
 1895: 			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
 1896: 			    			ctxt->states[newto],
 1897: 					        tmp->trans[j].counter,
 1898: 						tmp->trans[j].count);
 1899: 			}
 1900: 		    }
 1901: 		}
 1902: 		if (state->type == XML_REGEXP_FINAL_STATE)
 1903: 		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
 1904: 		/* eliminate the transition completely */
 1905: 		state->nbTrans = 0;
 1906: 
 1907:                 state->type = XML_REGEXP_UNREACH_STATE;
 1908: 
 1909: 	    }
 1910:             
 1911: 	}
 1912:     }
 1913: }
 1914: /**
 1915:  * xmlFAEliminateEpsilonTransitions:
 1916:  * @ctxt:  a regexp parser context
 1917:  *
 1918:  */
 1919: static void
 1920: xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 1921:     int statenr, transnr;
 1922:     xmlRegStatePtr state;
 1923:     int has_epsilon;
 1924: 
 1925:     if (ctxt->states == NULL) return;
 1926: 
 1927:     /*
 1928:      * Eliminate simple epsilon transition and the associated unreachable
 1929:      * states.
 1930:      */
 1931:     xmlFAEliminateSimpleEpsilonTransitions(ctxt);
 1932:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1933: 	state = ctxt->states[statenr];
 1934: 	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
 1935: #ifdef DEBUG_REGEXP_GRAPH
 1936: 	    printf("Removed unreachable state %d\n", statenr);
 1937: #endif
 1938: 	    xmlRegFreeState(state);
 1939: 	    ctxt->states[statenr] = NULL;
 1940: 	}
 1941:     }
 1942: 
 1943:     has_epsilon = 0;
 1944: 
 1945:     /*
 1946:      * Build the completed transitions bypassing the epsilons
 1947:      * Use a marking algorithm to avoid loops
 1948:      * Mark sink states too.
 1949:      * Process from the latests states backward to the start when
 1950:      * there is long cascading epsilon chains this minimize the
 1951:      * recursions and transition compares when adding the new ones
 1952:      */
 1953:     for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
 1954: 	state = ctxt->states[statenr];
 1955: 	if (state == NULL)
 1956: 	    continue;
 1957: 	if ((state->nbTrans == 0) &&
 1958: 	    (state->type != XML_REGEXP_FINAL_STATE)) {
 1959: 	    state->type = XML_REGEXP_SINK_STATE;
 1960: 	}
 1961: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 1962: 	    if ((state->trans[transnr].atom == NULL) &&
 1963: 		(state->trans[transnr].to >= 0)) {
 1964: 		if (state->trans[transnr].to == statenr) {
 1965: 		    state->trans[transnr].to = -1;
 1966: #ifdef DEBUG_REGEXP_GRAPH
 1967: 		    printf("Removed loopback epsilon trans %d on %d\n",
 1968: 			   transnr, statenr);
 1969: #endif
 1970: 		} else if (state->trans[transnr].count < 0) {
 1971: 		    int newto = state->trans[transnr].to;
 1972: 
 1973: #ifdef DEBUG_REGEXP_GRAPH
 1974: 		    printf("Found epsilon trans %d from %d to %d\n",
 1975: 			   transnr, statenr, newto);
 1976: #endif
 1977: 		    has_epsilon = 1;
 1978: 		    state->trans[transnr].to = -2;
 1979: 		    state->mark = XML_REGEXP_MARK_START;
 1980: 		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
 1981: 				      newto, state->trans[transnr].counter);
 1982: 		    state->mark = XML_REGEXP_MARK_NORMAL;
 1983: #ifdef DEBUG_REGEXP_GRAPH
 1984: 		} else {
 1985: 		    printf("Found counted transition %d on %d\n",
 1986: 			   transnr, statenr);
 1987: #endif
 1988: 	        }
 1989: 	    }
 1990: 	}
 1991:     }
 1992:     /*
 1993:      * Eliminate the epsilon transitions
 1994:      */
 1995:     if (has_epsilon) {
 1996: 	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1997: 	    state = ctxt->states[statenr];
 1998: 	    if (state == NULL)
 1999: 		continue;
 2000: 	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2001: 		xmlRegTransPtr trans = &(state->trans[transnr]);
 2002: 		if ((trans->atom == NULL) &&
 2003: 		    (trans->count < 0) &&
 2004: 		    (trans->to >= 0)) {
 2005: 		    trans->to = -1;
 2006: 		}
 2007: 	    }
 2008: 	}
 2009:     }
 2010: 
 2011:     /*
 2012:      * Use this pass to detect unreachable states too
 2013:      */
 2014:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2015: 	state = ctxt->states[statenr];
 2016: 	if (state != NULL)
 2017: 	    state->reached = XML_REGEXP_MARK_NORMAL;
 2018:     }
 2019:     state = ctxt->states[0];
 2020:     if (state != NULL)
 2021: 	state->reached = XML_REGEXP_MARK_START;
 2022:     while (state != NULL) {
 2023: 	xmlRegStatePtr target = NULL;
 2024: 	state->reached = XML_REGEXP_MARK_VISITED;
 2025: 	/*
 2026: 	 * Mark all states reachable from the current reachable state
 2027: 	 */
 2028: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2029: 	    if ((state->trans[transnr].to >= 0) &&
 2030: 		((state->trans[transnr].atom != NULL) ||
 2031: 		 (state->trans[transnr].count >= 0))) {
 2032: 		int newto = state->trans[transnr].to;
 2033: 
 2034: 		if (ctxt->states[newto] == NULL)
 2035: 		    continue;
 2036: 		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
 2037: 		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
 2038: 		    target = ctxt->states[newto];
 2039: 		}
 2040: 	    }
 2041: 	}
 2042: 
 2043: 	/*
 2044: 	 * find the next accessible state not explored
 2045: 	 */
 2046: 	if (target == NULL) {
 2047: 	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
 2048: 		state = ctxt->states[statenr];
 2049: 		if ((state != NULL) && (state->reached ==
 2050: 			XML_REGEXP_MARK_START)) {
 2051: 		    target = state;
 2052: 		    break;
 2053: 		}
 2054: 	    }
 2055: 	}
 2056: 	state = target;
 2057:     }
 2058:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2059: 	state = ctxt->states[statenr];
 2060: 	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
 2061: #ifdef DEBUG_REGEXP_GRAPH
 2062: 	    printf("Removed unreachable state %d\n", statenr);
 2063: #endif
 2064: 	    xmlRegFreeState(state);
 2065: 	    ctxt->states[statenr] = NULL;
 2066: 	}
 2067:     }
 2068: 
 2069: }
 2070: 
 2071: static int
 2072: xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
 2073:     int ret = 0;
 2074: 
 2075:     if ((range1->type == XML_REGEXP_RANGES) ||
 2076:         (range2->type == XML_REGEXP_RANGES) ||
 2077:         (range2->type == XML_REGEXP_SUBREG) ||
 2078:         (range1->type == XML_REGEXP_SUBREG) ||
 2079:         (range1->type == XML_REGEXP_STRING) ||
 2080:         (range2->type == XML_REGEXP_STRING))
 2081: 	return(-1);
 2082: 
 2083:     /* put them in order */
 2084:     if (range1->type > range2->type) {
 2085:         xmlRegRangePtr tmp;
 2086: 
 2087: 	tmp = range1;
 2088: 	range1 = range2;
 2089: 	range2 = tmp;
 2090:     }
 2091:     if ((range1->type == XML_REGEXP_ANYCHAR) ||
 2092:         (range2->type == XML_REGEXP_ANYCHAR)) {
 2093: 	ret = 1;
 2094:     } else if ((range1->type == XML_REGEXP_EPSILON) ||
 2095:                (range2->type == XML_REGEXP_EPSILON)) {
 2096: 	return(0);
 2097:     } else if (range1->type == range2->type) {
 2098:         if (range1->type != XML_REGEXP_CHARVAL)
 2099:             ret = 1;
 2100:         else if ((range1->end < range2->start) ||
 2101: 	         (range2->end < range1->start))
 2102: 	    ret = 0;
 2103: 	else
 2104: 	    ret = 1;
 2105:     } else if (range1->type == XML_REGEXP_CHARVAL) {
 2106:         int codepoint;
 2107: 	int neg = 0;
 2108: 
 2109: 	/*
 2110: 	 * just check all codepoints in the range for acceptance,
 2111: 	 * this is usually way cheaper since done only once at
 2112: 	 * compilation than testing over and over at runtime or 
 2113: 	 * pushing too many states when evaluating.
 2114: 	 */
 2115: 	if (((range1->neg == 0) && (range2->neg != 0)) ||
 2116: 	    ((range1->neg != 0) && (range2->neg == 0)))
 2117: 	    neg = 1;
 2118: 
 2119: 	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
 2120: 	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
 2121: 					    0, range2->start, range2->end,
 2122: 					    range2->blockName);
 2123: 	    if (ret < 0)
 2124: 	        return(-1);
 2125: 	    if (((neg == 1) && (ret == 0)) ||
 2126: 	        ((neg == 0) && (ret == 1)))
 2127: 		return(1);
 2128: 	}
 2129: 	return(0);
 2130:     } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
 2131:                (range2->type == XML_REGEXP_BLOCK_NAME)) {
 2132: 	if (range1->type == range2->type) {
 2133: 	    ret = xmlStrEqual(range1->blockName, range2->blockName);
 2134: 	} else {
 2135: 	    /*
 2136: 	     * comparing a block range with anything else is way
 2137: 	     * too costly, and maintining the table is like too much
 2138: 	     * memory too, so let's force the automata to save state
 2139: 	     * here.
 2140: 	     */
 2141: 	    return(1);
 2142: 	}
 2143:     } else if ((range1->type < XML_REGEXP_LETTER) ||
 2144:                (range2->type < XML_REGEXP_LETTER)) {
 2145: 	if ((range1->type == XML_REGEXP_ANYSPACE) &&
 2146: 	    (range2->type == XML_REGEXP_NOTSPACE))
 2147: 	    ret = 0;
 2148: 	else if ((range1->type == XML_REGEXP_INITNAME) &&
 2149: 	         (range2->type == XML_REGEXP_NOTINITNAME))
 2150: 	    ret = 0;
 2151: 	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
 2152: 	         (range2->type == XML_REGEXP_NOTNAMECHAR))
 2153: 	    ret = 0;
 2154: 	else if ((range1->type == XML_REGEXP_DECIMAL) &&
 2155: 	         (range2->type == XML_REGEXP_NOTDECIMAL))
 2156: 	    ret = 0;
 2157: 	else if ((range1->type == XML_REGEXP_REALCHAR) &&
 2158: 	         (range2->type == XML_REGEXP_NOTREALCHAR))
 2159: 	    ret = 0;
 2160: 	else {
 2161: 	    /* same thing to limit complexity */
 2162: 	    return(1);
 2163: 	}
 2164:     } else {
 2165:         ret = 0;
 2166:         /* range1->type < range2->type here */
 2167:         switch (range1->type) {
 2168: 	    case XML_REGEXP_LETTER:
 2169: 	         /* all disjoint except in the subgroups */
 2170: 	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
 2171: 		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
 2172: 		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
 2173: 		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
 2174: 		     (range2->type == XML_REGEXP_LETTER_OTHERS))
 2175: 		     ret = 1;
 2176: 		 break;
 2177: 	    case XML_REGEXP_MARK:
 2178: 	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
 2179: 		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
 2180: 		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
 2181: 		     ret = 1;
 2182: 		 break;
 2183: 	    case XML_REGEXP_NUMBER:
 2184: 	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
 2185: 		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
 2186: 		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
 2187: 		     ret = 1;
 2188: 		 break;
 2189: 	    case XML_REGEXP_PUNCT:
 2190: 	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
 2191: 		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
 2192: 		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
 2193: 		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
 2194: 		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
 2195: 		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
 2196: 		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
 2197: 		     ret = 1;
 2198: 		 break;
 2199: 	    case XML_REGEXP_SEPAR:
 2200: 	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
 2201: 		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
 2202: 		     (range2->type == XML_REGEXP_SEPAR_PARA))
 2203: 		     ret = 1;
 2204: 		 break;
 2205: 	    case XML_REGEXP_SYMBOL:
 2206: 	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
 2207: 		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
 2208: 		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
 2209: 		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
 2210: 		     ret = 1;
 2211: 		 break;
 2212: 	    case XML_REGEXP_OTHER:
 2213: 	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
 2214: 		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
 2215: 		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
 2216: 		     ret = 1;
 2217: 		 break;
 2218:             default:
 2219: 	         if ((range2->type >= XML_REGEXP_LETTER) &&
 2220: 		     (range2->type < XML_REGEXP_BLOCK_NAME))
 2221: 		     ret = 0;
 2222: 		 else {
 2223: 		     /* safety net ! */
 2224: 		     return(1);
 2225: 		 }
 2226: 	}
 2227:     }
 2228:     if (((range1->neg == 0) && (range2->neg != 0)) ||
 2229:         ((range1->neg != 0) && (range2->neg == 0)))
 2230: 	ret = !ret;
 2231:     return(ret);
 2232: }
 2233: 
 2234: /**
 2235:  * xmlFACompareAtomTypes:
 2236:  * @type1:  an atom type
 2237:  * @type2:  an atom type
 2238:  *
 2239:  * Compares two atoms type to check whether they intersect in some ways,
 2240:  * this is used by xmlFACompareAtoms only
 2241:  *
 2242:  * Returns 1 if they may intersect and 0 otherwise
 2243:  */
 2244: static int
 2245: xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
 2246:     if ((type1 == XML_REGEXP_EPSILON) ||
 2247:         (type1 == XML_REGEXP_CHARVAL) ||
 2248: 	(type1 == XML_REGEXP_RANGES) ||
 2249: 	(type1 == XML_REGEXP_SUBREG) ||
 2250: 	(type1 == XML_REGEXP_STRING) ||
 2251: 	(type1 == XML_REGEXP_ANYCHAR))
 2252: 	return(1);
 2253:     if ((type2 == XML_REGEXP_EPSILON) ||
 2254:         (type2 == XML_REGEXP_CHARVAL) ||
 2255: 	(type2 == XML_REGEXP_RANGES) ||
 2256: 	(type2 == XML_REGEXP_SUBREG) ||
 2257: 	(type2 == XML_REGEXP_STRING) ||
 2258: 	(type2 == XML_REGEXP_ANYCHAR))
 2259: 	return(1);
 2260: 
 2261:     if (type1 == type2) return(1);
 2262: 
 2263:     /* simplify subsequent compares by making sure type1 < type2 */
 2264:     if (type1 > type2) {
 2265:         xmlRegAtomType tmp = type1;
 2266: 	type1 = type2;
 2267: 	type2 = tmp;
 2268:     }
 2269:     switch (type1) {
 2270:         case XML_REGEXP_ANYSPACE: /* \s */
 2271: 	    /* can't be a letter, number, mark, pontuation, symbol */
 2272: 	    if ((type2 == XML_REGEXP_NOTSPACE) ||
 2273: 		((type2 >= XML_REGEXP_LETTER) &&
 2274: 		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
 2275: 	        ((type2 >= XML_REGEXP_NUMBER) &&
 2276: 		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
 2277: 	        ((type2 >= XML_REGEXP_MARK) &&
 2278: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2279: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2280: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2281: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2282: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
 2283: 	        ) return(0);
 2284: 	    break;
 2285:         case XML_REGEXP_NOTSPACE: /* \S */
 2286: 	    break;
 2287:         case XML_REGEXP_INITNAME: /* \l */
 2288: 	    /* can't be a number, mark, separator, pontuation, symbol or other */
 2289: 	    if ((type2 == XML_REGEXP_NOTINITNAME) ||
 2290: 	        ((type2 >= XML_REGEXP_NUMBER) &&
 2291: 		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
 2292: 	        ((type2 >= XML_REGEXP_MARK) &&
 2293: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2294: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2295: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2296: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2297: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2298: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2299: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2300: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2301: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2302: 		) return(0);
 2303: 	    break;
 2304:         case XML_REGEXP_NOTINITNAME: /* \L */
 2305: 	    break;
 2306:         case XML_REGEXP_NAMECHAR: /* \c */
 2307: 	    /* can't be a mark, separator, pontuation, symbol or other */
 2308: 	    if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
 2309: 	        ((type2 >= XML_REGEXP_MARK) &&
 2310: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2311: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2312: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2313: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2314: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2315: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2316: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2317: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2318: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2319: 		) return(0);
 2320: 	    break;
 2321:         case XML_REGEXP_NOTNAMECHAR: /* \C */
 2322: 	    break;
 2323:         case XML_REGEXP_DECIMAL: /* \d */
 2324: 	    /* can't be a letter, mark, separator, pontuation, symbol or other */
 2325: 	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
 2326: 	        (type2 == XML_REGEXP_REALCHAR) ||
 2327: 		((type2 >= XML_REGEXP_LETTER) &&
 2328: 		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
 2329: 	        ((type2 >= XML_REGEXP_MARK) &&
 2330: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2331: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2332: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2333: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2334: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2335: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2336: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2337: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2338: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2339: 		)return(0);
 2340: 	    break;
 2341:         case XML_REGEXP_NOTDECIMAL: /* \D */
 2342: 	    break;
 2343:         case XML_REGEXP_REALCHAR: /* \w */
 2344: 	    /* can't be a mark, separator, pontuation, symbol or other */
 2345: 	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
 2346: 	        ((type2 >= XML_REGEXP_MARK) &&
 2347: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2348: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2349: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2350: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2351: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2352: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2353: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2354: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2355: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2356: 		)return(0);
 2357: 	    break;
 2358:         case XML_REGEXP_NOTREALCHAR: /* \W */
 2359: 	    break;
 2360: 	/*
 2361: 	 * at that point we know both type 1 and type2 are from
 2362: 	 * character categories are ordered and are different,
 2363: 	 * it becomes simple because this is a partition
 2364: 	 */
 2365:         case XML_REGEXP_LETTER:
 2366: 	    if (type2 <= XML_REGEXP_LETTER_OTHERS)
 2367: 	        return(1);
 2368: 	    return(0);
 2369:         case XML_REGEXP_LETTER_UPPERCASE:
 2370:         case XML_REGEXP_LETTER_LOWERCASE:
 2371:         case XML_REGEXP_LETTER_TITLECASE:
 2372:         case XML_REGEXP_LETTER_MODIFIER:
 2373:         case XML_REGEXP_LETTER_OTHERS:
 2374: 	    return(0);
 2375:         case XML_REGEXP_MARK:
 2376: 	    if (type2 <= XML_REGEXP_MARK_ENCLOSING)
 2377: 	        return(1);
 2378: 	    return(0);
 2379:         case XML_REGEXP_MARK_NONSPACING:
 2380:         case XML_REGEXP_MARK_SPACECOMBINING:
 2381:         case XML_REGEXP_MARK_ENCLOSING:
 2382: 	    return(0);
 2383:         case XML_REGEXP_NUMBER:
 2384: 	    if (type2 <= XML_REGEXP_NUMBER_OTHERS)
 2385: 	        return(1);
 2386: 	    return(0);
 2387:         case XML_REGEXP_NUMBER_DECIMAL:
 2388:         case XML_REGEXP_NUMBER_LETTER:
 2389:         case XML_REGEXP_NUMBER_OTHERS:
 2390: 	    return(0);
 2391:         case XML_REGEXP_PUNCT:
 2392: 	    if (type2 <= XML_REGEXP_PUNCT_OTHERS)
 2393: 	        return(1);
 2394: 	    return(0);
 2395:         case XML_REGEXP_PUNCT_CONNECTOR:
 2396:         case XML_REGEXP_PUNCT_DASH:
 2397:         case XML_REGEXP_PUNCT_OPEN:
 2398:         case XML_REGEXP_PUNCT_CLOSE:
 2399:         case XML_REGEXP_PUNCT_INITQUOTE:
 2400:         case XML_REGEXP_PUNCT_FINQUOTE:
 2401:         case XML_REGEXP_PUNCT_OTHERS:
 2402: 	    return(0);
 2403:         case XML_REGEXP_SEPAR:
 2404: 	    if (type2 <= XML_REGEXP_SEPAR_PARA)
 2405: 	        return(1);
 2406: 	    return(0);
 2407:         case XML_REGEXP_SEPAR_SPACE:
 2408:         case XML_REGEXP_SEPAR_LINE:
 2409:         case XML_REGEXP_SEPAR_PARA:
 2410: 	    return(0);
 2411:         case XML_REGEXP_SYMBOL:
 2412: 	    if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
 2413: 	        return(1);
 2414: 	    return(0);
 2415:         case XML_REGEXP_SYMBOL_MATH:
 2416:         case XML_REGEXP_SYMBOL_CURRENCY:
 2417:         case XML_REGEXP_SYMBOL_MODIFIER:
 2418:         case XML_REGEXP_SYMBOL_OTHERS:
 2419: 	    return(0);
 2420:         case XML_REGEXP_OTHER:
 2421: 	    if (type2 <= XML_REGEXP_OTHER_NA)
 2422: 	        return(1);
 2423: 	    return(0);
 2424:         case XML_REGEXP_OTHER_CONTROL:
 2425:         case XML_REGEXP_OTHER_FORMAT:
 2426:         case XML_REGEXP_OTHER_PRIVATE:
 2427:         case XML_REGEXP_OTHER_NA:
 2428: 	    return(0);
 2429: 	default:
 2430: 	    break;
 2431:     }
 2432:     return(1);
 2433: }
 2434: 
 2435: /**
 2436:  * xmlFAEqualAtoms:
 2437:  * @atom1:  an atom
 2438:  * @atom2:  an atom
 2439:  * @deep: if not set only compare string pointers
 2440:  *
 2441:  * Compares two atoms to check whether they are the same exactly
 2442:  * this is used to remove equivalent transitions
 2443:  *
 2444:  * Returns 1 if same and 0 otherwise
 2445:  */
 2446: static int
 2447: xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
 2448:     int ret = 0;
 2449: 
 2450:     if (atom1 == atom2)
 2451: 	return(1);
 2452:     if ((atom1 == NULL) || (atom2 == NULL))
 2453: 	return(0);
 2454: 
 2455:     if (atom1->type != atom2->type)
 2456:         return(0);
 2457:     switch (atom1->type) {
 2458:         case XML_REGEXP_EPSILON:
 2459: 	    ret = 0;
 2460: 	    break;
 2461:         case XML_REGEXP_STRING:
 2462:             if (!deep)
 2463:                 ret = (atom1->valuep == atom2->valuep);
 2464:             else
 2465:                 ret = xmlStrEqual((xmlChar *)atom1->valuep,
 2466:                                   (xmlChar *)atom2->valuep);
 2467: 	    break;
 2468:         case XML_REGEXP_CHARVAL:
 2469: 	    ret = (atom1->codepoint == atom2->codepoint);
 2470: 	    break;
 2471: 	case XML_REGEXP_RANGES:
 2472: 	    /* too hard to do in the general case */
 2473: 	    ret = 0;
 2474: 	default:
 2475: 	    break;
 2476:     }
 2477:     return(ret);
 2478: }
 2479: 
 2480: /**
 2481:  * xmlFACompareAtoms:
 2482:  * @atom1:  an atom
 2483:  * @atom2:  an atom
 2484:  * @deep: if not set only compare string pointers
 2485:  *
 2486:  * Compares two atoms to check whether they intersect in some ways,
 2487:  * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
 2488:  *
 2489:  * Returns 1 if yes and 0 otherwise
 2490:  */
 2491: static int
 2492: xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
 2493:     int ret = 1;
 2494: 
 2495:     if (atom1 == atom2)
 2496: 	return(1);
 2497:     if ((atom1 == NULL) || (atom2 == NULL))
 2498: 	return(0);
 2499: 
 2500:     if ((atom1->type == XML_REGEXP_ANYCHAR) ||
 2501:         (atom2->type == XML_REGEXP_ANYCHAR))
 2502: 	return(1);
 2503: 
 2504:     if (atom1->type > atom2->type) {
 2505: 	xmlRegAtomPtr tmp;
 2506: 	tmp = atom1;
 2507: 	atom1 = atom2;
 2508: 	atom2 = tmp;
 2509:     }
 2510:     if (atom1->type != atom2->type) {
 2511:         ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
 2512: 	/* if they can't intersect at the type level break now */
 2513: 	if (ret == 0)
 2514: 	    return(0);
 2515:     }
 2516:     switch (atom1->type) {
 2517:         case XML_REGEXP_STRING:
 2518:             if (!deep)
 2519:                 ret = (atom1->valuep != atom2->valuep);
 2520:             else
 2521:                 ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
 2522:                                              (xmlChar *)atom2->valuep);
 2523: 	    break;
 2524:         case XML_REGEXP_EPSILON:
 2525: 	    goto not_determinist;
 2526:         case XML_REGEXP_CHARVAL:
 2527: 	    if (atom2->type == XML_REGEXP_CHARVAL) {
 2528: 		ret = (atom1->codepoint == atom2->codepoint);
 2529: 	    } else {
 2530: 	        ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
 2531: 		if (ret < 0)
 2532: 		    ret = 1;
 2533: 	    }
 2534: 	    break;
 2535:         case XML_REGEXP_RANGES:
 2536: 	    if (atom2->type == XML_REGEXP_RANGES) {
 2537: 	        int i, j, res;
 2538: 		xmlRegRangePtr r1, r2;
 2539: 
 2540: 		/*
 2541: 		 * need to check that none of the ranges eventually matches
 2542: 		 */
 2543: 		for (i = 0;i < atom1->nbRanges;i++) {
 2544: 		    for (j = 0;j < atom2->nbRanges;j++) {
 2545: 			r1 = atom1->ranges[i];
 2546: 			r2 = atom2->ranges[j];
 2547: 			res = xmlFACompareRanges(r1, r2);
 2548: 			if (res == 1) {
 2549: 			    ret = 1;
 2550: 			    goto done;
 2551: 			}
 2552: 		    }
 2553: 		}
 2554: 		ret = 0;
 2555: 	    }
 2556: 	    break;
 2557: 	default:
 2558: 	    goto not_determinist;
 2559:     }
 2560: done:
 2561:     if (atom1->neg != atom2->neg) {
 2562:         ret = !ret;
 2563:     }
 2564:     if (ret == 0)
 2565:         return(0);
 2566: not_determinist:
 2567:     return(1);
 2568: }
 2569: 
 2570: /**
 2571:  * xmlFARecurseDeterminism:
 2572:  * @ctxt:  a regexp parser context
 2573:  *
 2574:  * Check whether the associated regexp is determinist,
 2575:  * should be called after xmlFAEliminateEpsilonTransitions()
 2576:  *
 2577:  */
 2578: static int
 2579: xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 2580: 	                 int to, xmlRegAtomPtr atom) {
 2581:     int ret = 1;
 2582:     int res;
 2583:     int transnr, nbTrans;
 2584:     xmlRegTransPtr t1;
 2585:     int deep = 1;
 2586: 
 2587:     if (state == NULL)
 2588: 	return(ret);
 2589: 
 2590:     if (ctxt->flags & AM_AUTOMATA_RNG)
 2591:         deep = 0;
 2592: 
 2593:     /*
 2594:      * don't recurse on transitions potentially added in the course of
 2595:      * the elimination.
 2596:      */
 2597:     nbTrans = state->nbTrans;
 2598:     for (transnr = 0;transnr < nbTrans;transnr++) {
 2599: 	t1 = &(state->trans[transnr]);
 2600: 	/*
 2601: 	 * check transitions conflicting with the one looked at
 2602: 	 */
 2603: 	if (t1->atom == NULL) {
 2604: 	    if (t1->to < 0)
 2605: 		continue;
 2606: 	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 2607: 		                           to, atom);
 2608: 	    if (res == 0) {
 2609: 	        ret = 0;
 2610: 		/* t1->nd = 1; */
 2611: 	    }
 2612: 	    continue;
 2613: 	}
 2614: 	if (t1->to != to)
 2615: 	    continue;
 2616: 	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
 2617: 	    ret = 0;
 2618: 	    /* mark the transition as non-deterministic */
 2619: 	    t1->nd = 1;
 2620: 	}
 2621:     }
 2622:     return(ret);
 2623: }
 2624: 
 2625: /**
 2626:  * xmlFAComputesDeterminism:
 2627:  * @ctxt:  a regexp parser context
 2628:  *
 2629:  * Check whether the associated regexp is determinist,
 2630:  * should be called after xmlFAEliminateEpsilonTransitions()
 2631:  *
 2632:  */
 2633: static int
 2634: xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 2635:     int statenr, transnr;
 2636:     xmlRegStatePtr state;
 2637:     xmlRegTransPtr t1, t2, last;
 2638:     int i;
 2639:     int ret = 1;
 2640:     int deep = 1;
 2641: 
 2642: #ifdef DEBUG_REGEXP_GRAPH
 2643:     printf("xmlFAComputesDeterminism\n");
 2644:     xmlRegPrintCtxt(stdout, ctxt);
 2645: #endif
 2646:     if (ctxt->determinist != -1)
 2647: 	return(ctxt->determinist);
 2648: 
 2649:     if (ctxt->flags & AM_AUTOMATA_RNG)
 2650:         deep = 0;
 2651: 
 2652:     /*
 2653:      * First cleanup the automata removing cancelled transitions
 2654:      */
 2655:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2656: 	state = ctxt->states[statenr];
 2657: 	if (state == NULL)
 2658: 	    continue;
 2659: 	if (state->nbTrans < 2)
 2660: 	    continue;
 2661: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2662: 	    t1 = &(state->trans[transnr]);
 2663: 	    /*
 2664: 	     * Determinism checks in case of counted or all transitions
 2665: 	     * will have to be handled separately
 2666: 	     */
 2667: 	    if (t1->atom == NULL) {
 2668: 		/* t1->nd = 1; */
 2669: 		continue;
 2670: 	    }
 2671: 	    if (t1->to == -1) /* eliminated */
 2672: 		continue;
 2673: 	    for (i = 0;i < transnr;i++) {
 2674: 		t2 = &(state->trans[i]);
 2675: 		if (t2->to == -1) /* eliminated */
 2676: 		    continue;
 2677: 		if (t2->atom != NULL) {
 2678: 		    if (t1->to == t2->to) {
 2679:                         /*
 2680:                          * Here we use deep because we want to keep the
 2681:                          * transitions which indicate a conflict
 2682:                          */
 2683: 			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
 2684:                             (t1->counter == t2->counter) &&
 2685:                             (t1->count == t2->count))
 2686: 			    t2->to = -1; /* eliminated */
 2687: 		    }
 2688: 		}
 2689: 	    }
 2690: 	}
 2691:     }
 2692: 
 2693:     /*
 2694:      * Check for all states that there aren't 2 transitions
 2695:      * with the same atom and a different target.
 2696:      */
 2697:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2698: 	state = ctxt->states[statenr];
 2699: 	if (state == NULL)
 2700: 	    continue;
 2701: 	if (state->nbTrans < 2)
 2702: 	    continue;
 2703: 	last = NULL;
 2704: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2705: 	    t1 = &(state->trans[transnr]);
 2706: 	    /*
 2707: 	     * Determinism checks in case of counted or all transitions
 2708: 	     * will have to be handled separately
 2709: 	     */
 2710: 	    if (t1->atom == NULL) {
 2711: 		continue;
 2712: 	    }
 2713: 	    if (t1->to == -1) /* eliminated */
 2714: 		continue;
 2715: 	    for (i = 0;i < transnr;i++) {
 2716: 		t2 = &(state->trans[i]);
 2717: 		if (t2->to == -1) /* eliminated */
 2718: 		    continue;
 2719: 		if (t2->atom != NULL) {
 2720:                     /*
 2721:                      * But here we don't use deep because we want to
 2722:                      * find transitions which indicate a conflict
 2723:                      */
 2724: 		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
 2725: 			ret = 0;
 2726: 			/* mark the transitions as non-deterministic ones */
 2727: 			t1->nd = 1;
 2728: 			t2->nd = 1;
 2729: 			last = t1;
 2730: 		    }
 2731: 		} else if (t1->to != -1) {
 2732: 		    /*
 2733: 		     * do the closure in case of remaining specific
 2734: 		     * epsilon transitions like choices or all
 2735: 		     */
 2736: 		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 2737: 						   t2->to, t2->atom);
 2738: 		    /* don't shortcut the computation so all non deterministic
 2739: 		       transition get marked down
 2740: 		    if (ret == 0)
 2741: 			return(0);
 2742: 		     */
 2743: 		    if (ret == 0) {
 2744: 			t1->nd = 1;
 2745: 			/* t2->nd = 1; */
 2746: 			last = t1;
 2747: 		    }
 2748: 		}
 2749: 	    }
 2750: 	    /* don't shortcut the computation so all non deterministic
 2751: 	       transition get marked down
 2752: 	    if (ret == 0)
 2753: 		break; */
 2754: 	}
 2755: 
 2756: 	/*
 2757: 	 * mark specifically the last non-deterministic transition
 2758: 	 * from a state since there is no need to set-up rollback
 2759: 	 * from it
 2760: 	 */
 2761: 	if (last != NULL) {
 2762: 	    last->nd = 2;
 2763: 	}
 2764: 
 2765: 	/* don't shortcut the computation so all non deterministic
 2766: 	   transition get marked down
 2767: 	if (ret == 0)
 2768: 	    break; */
 2769:     }
 2770: 
 2771:     ctxt->determinist = ret;
 2772:     return(ret);
 2773: }
 2774: 
 2775: /************************************************************************
 2776:  * 									*
 2777:  *	Routines to check input against transition atoms		*
 2778:  * 									*
 2779:  ************************************************************************/
 2780: 
 2781: static int
 2782: xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
 2783: 	                  int start, int end, const xmlChar *blockName) {
 2784:     int ret = 0;
 2785: 
 2786:     switch (type) {
 2787:         case XML_REGEXP_STRING:
 2788:         case XML_REGEXP_SUBREG:
 2789:         case XML_REGEXP_RANGES:
 2790:         case XML_REGEXP_EPSILON:
 2791: 	    return(-1);
 2792:         case XML_REGEXP_ANYCHAR:
 2793: 	    ret = ((codepoint != '\n') && (codepoint != '\r'));
 2794: 	    break;
 2795:         case XML_REGEXP_CHARVAL:
 2796: 	    ret = ((codepoint >= start) && (codepoint <= end));
 2797: 	    break;
 2798:         case XML_REGEXP_NOTSPACE:
 2799: 	    neg = !neg;
 2800:         case XML_REGEXP_ANYSPACE:
 2801: 	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
 2802: 		   (codepoint == '\t') || (codepoint == ' '));
 2803: 	    break;
 2804:         case XML_REGEXP_NOTINITNAME:
 2805: 	    neg = !neg;
 2806:         case XML_REGEXP_INITNAME:
 2807: 	    ret = (IS_LETTER(codepoint) || 
 2808: 		   (codepoint == '_') || (codepoint == ':'));
 2809: 	    break;
 2810:         case XML_REGEXP_NOTNAMECHAR:
 2811: 	    neg = !neg;
 2812:         case XML_REGEXP_NAMECHAR:
 2813: 	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
 2814: 		   (codepoint == '.') || (codepoint == '-') ||
 2815: 		   (codepoint == '_') || (codepoint == ':') ||
 2816: 		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
 2817: 	    break;
 2818:         case XML_REGEXP_NOTDECIMAL:
 2819: 	    neg = !neg;
 2820:         case XML_REGEXP_DECIMAL:
 2821: 	    ret = xmlUCSIsCatNd(codepoint);
 2822: 	    break;
 2823:         case XML_REGEXP_REALCHAR:
 2824: 	    neg = !neg;
 2825:         case XML_REGEXP_NOTREALCHAR:
 2826: 	    ret = xmlUCSIsCatP(codepoint);
 2827: 	    if (ret == 0)
 2828: 		ret = xmlUCSIsCatZ(codepoint);
 2829: 	    if (ret == 0)
 2830: 		ret = xmlUCSIsCatC(codepoint);
 2831: 	    break;
 2832:         case XML_REGEXP_LETTER:
 2833: 	    ret = xmlUCSIsCatL(codepoint);
 2834: 	    break;
 2835:         case XML_REGEXP_LETTER_UPPERCASE:
 2836: 	    ret = xmlUCSIsCatLu(codepoint);
 2837: 	    break;
 2838:         case XML_REGEXP_LETTER_LOWERCASE:
 2839: 	    ret = xmlUCSIsCatLl(codepoint);
 2840: 	    break;
 2841:         case XML_REGEXP_LETTER_TITLECASE:
 2842: 	    ret = xmlUCSIsCatLt(codepoint);
 2843: 	    break;
 2844:         case XML_REGEXP_LETTER_MODIFIER:
 2845: 	    ret = xmlUCSIsCatLm(codepoint);
 2846: 	    break;
 2847:         case XML_REGEXP_LETTER_OTHERS:
 2848: 	    ret = xmlUCSIsCatLo(codepoint);
 2849: 	    break;
 2850:         case XML_REGEXP_MARK:
 2851: 	    ret = xmlUCSIsCatM(codepoint);
 2852: 	    break;
 2853:         case XML_REGEXP_MARK_NONSPACING:
 2854: 	    ret = xmlUCSIsCatMn(codepoint);
 2855: 	    break;
 2856:         case XML_REGEXP_MARK_SPACECOMBINING:
 2857: 	    ret = xmlUCSIsCatMc(codepoint);
 2858: 	    break;
 2859:         case XML_REGEXP_MARK_ENCLOSING:
 2860: 	    ret = xmlUCSIsCatMe(codepoint);
 2861: 	    break;
 2862:         case XML_REGEXP_NUMBER:
 2863: 	    ret = xmlUCSIsCatN(codepoint);
 2864: 	    break;
 2865:         case XML_REGEXP_NUMBER_DECIMAL:
 2866: 	    ret = xmlUCSIsCatNd(codepoint);
 2867: 	    break;
 2868:         case XML_REGEXP_NUMBER_LETTER:
 2869: 	    ret = xmlUCSIsCatNl(codepoint);
 2870: 	    break;
 2871:         case XML_REGEXP_NUMBER_OTHERS:
 2872: 	    ret = xmlUCSIsCatNo(codepoint);
 2873: 	    break;
 2874:         case XML_REGEXP_PUNCT:
 2875: 	    ret = xmlUCSIsCatP(codepoint);
 2876: 	    break;
 2877:         case XML_REGEXP_PUNCT_CONNECTOR:
 2878: 	    ret = xmlUCSIsCatPc(codepoint);
 2879: 	    break;
 2880:         case XML_REGEXP_PUNCT_DASH:
 2881: 	    ret = xmlUCSIsCatPd(codepoint);
 2882: 	    break;
 2883:         case XML_REGEXP_PUNCT_OPEN:
 2884: 	    ret = xmlUCSIsCatPs(codepoint);
 2885: 	    break;
 2886:         case XML_REGEXP_PUNCT_CLOSE:
 2887: 	    ret = xmlUCSIsCatPe(codepoint);
 2888: 	    break;
 2889:         case XML_REGEXP_PUNCT_INITQUOTE:
 2890: 	    ret = xmlUCSIsCatPi(codepoint);
 2891: 	    break;
 2892:         case XML_REGEXP_PUNCT_FINQUOTE:
 2893: 	    ret = xmlUCSIsCatPf(codepoint);
 2894: 	    break;
 2895:         case XML_REGEXP_PUNCT_OTHERS:
 2896: 	    ret = xmlUCSIsCatPo(codepoint);
 2897: 	    break;
 2898:         case XML_REGEXP_SEPAR:
 2899: 	    ret = xmlUCSIsCatZ(codepoint);
 2900: 	    break;
 2901:         case XML_REGEXP_SEPAR_SPACE:
 2902: 	    ret = xmlUCSIsCatZs(codepoint);
 2903: 	    break;
 2904:         case XML_REGEXP_SEPAR_LINE:
 2905: 	    ret = xmlUCSIsCatZl(codepoint);
 2906: 	    break;
 2907:         case XML_REGEXP_SEPAR_PARA:
 2908: 	    ret = xmlUCSIsCatZp(codepoint);
 2909: 	    break;
 2910:         case XML_REGEXP_SYMBOL:
 2911: 	    ret = xmlUCSIsCatS(codepoint);
 2912: 	    break;
 2913:         case XML_REGEXP_SYMBOL_MATH:
 2914: 	    ret = xmlUCSIsCatSm(codepoint);
 2915: 	    break;
 2916:         case XML_REGEXP_SYMBOL_CURRENCY:
 2917: 	    ret = xmlUCSIsCatSc(codepoint);
 2918: 	    break;
 2919:         case XML_REGEXP_SYMBOL_MODIFIER:
 2920: 	    ret = xmlUCSIsCatSk(codepoint);
 2921: 	    break;
 2922:         case XML_REGEXP_SYMBOL_OTHERS:
 2923: 	    ret = xmlUCSIsCatSo(codepoint);
 2924: 	    break;
 2925:         case XML_REGEXP_OTHER:
 2926: 	    ret = xmlUCSIsCatC(codepoint);
 2927: 	    break;
 2928:         case XML_REGEXP_OTHER_CONTROL:
 2929: 	    ret = xmlUCSIsCatCc(codepoint);
 2930: 	    break;
 2931:         case XML_REGEXP_OTHER_FORMAT:
 2932: 	    ret = xmlUCSIsCatCf(codepoint);
 2933: 	    break;
 2934:         case XML_REGEXP_OTHER_PRIVATE:
 2935: 	    ret = xmlUCSIsCatCo(codepoint);
 2936: 	    break;
 2937:         case XML_REGEXP_OTHER_NA:
 2938: 	    /* ret = xmlUCSIsCatCn(codepoint); */
 2939: 	    /* Seems it doesn't exist anymore in recent Unicode releases */
 2940: 	    ret = 0;
 2941: 	    break;
 2942:         case XML_REGEXP_BLOCK_NAME:
 2943: 	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
 2944: 	    break;
 2945:     }
 2946:     if (neg)
 2947: 	return(!ret);
 2948:     return(ret);
 2949: }
 2950: 
 2951: static int
 2952: xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
 2953:     int i, ret = 0;
 2954:     xmlRegRangePtr range;
 2955: 
 2956:     if ((atom == NULL) || (!IS_CHAR(codepoint)))
 2957: 	return(-1);
 2958: 
 2959:     switch (atom->type) {
 2960:         case XML_REGEXP_SUBREG:
 2961:         case XML_REGEXP_EPSILON:
 2962: 	    return(-1);
 2963:         case XML_REGEXP_CHARVAL:
 2964:             return(codepoint == atom->codepoint);
 2965:         case XML_REGEXP_RANGES: {
 2966: 	    int accept = 0;
 2967: 
 2968: 	    for (i = 0;i < atom->nbRanges;i++) {
 2969: 		range = atom->ranges[i];
 2970: 		if (range->neg == 2) {
 2971: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2972: 						0, range->start, range->end,
 2973: 						range->blockName);
 2974: 		    if (ret != 0)
 2975: 			return(0); /* excluded char */
 2976: 		} else if (range->neg) {
 2977: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2978: 						0, range->start, range->end,
 2979: 						range->blockName);
 2980: 		    if (ret == 0)
 2981: 		        accept = 1;
 2982: 		    else
 2983: 		        return(0);
 2984: 		} else {
 2985: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2986: 						0, range->start, range->end,
 2987: 						range->blockName);
 2988: 		    if (ret != 0)
 2989: 			accept = 1; /* might still be excluded */
 2990: 		}
 2991: 	    }
 2992: 	    return(accept);
 2993: 	}
 2994:         case XML_REGEXP_STRING:
 2995: 	    printf("TODO: XML_REGEXP_STRING\n");
 2996: 	    return(-1);
 2997:         case XML_REGEXP_ANYCHAR:
 2998:         case XML_REGEXP_ANYSPACE:
 2999:         case XML_REGEXP_NOTSPACE:
 3000:         case XML_REGEXP_INITNAME:
 3001:         case XML_REGEXP_NOTINITNAME:
 3002:         case XML_REGEXP_NAMECHAR:
 3003:         case XML_REGEXP_NOTNAMECHAR:
 3004:         case XML_REGEXP_DECIMAL:
 3005:         case XML_REGEXP_NOTDECIMAL:
 3006:         case XML_REGEXP_REALCHAR:
 3007:         case XML_REGEXP_NOTREALCHAR:
 3008:         case XML_REGEXP_LETTER:
 3009:         case XML_REGEXP_LETTER_UPPERCASE:
 3010:         case XML_REGEXP_LETTER_LOWERCASE:
 3011:         case XML_REGEXP_LETTER_TITLECASE:
 3012:         case XML_REGEXP_LETTER_MODIFIER:
 3013:         case XML_REGEXP_LETTER_OTHERS:
 3014:         case XML_REGEXP_MARK:
 3015:         case XML_REGEXP_MARK_NONSPACING:
 3016:         case XML_REGEXP_MARK_SPACECOMBINING:
 3017:         case XML_REGEXP_MARK_ENCLOSING:
 3018:         case XML_REGEXP_NUMBER:
 3019:         case XML_REGEXP_NUMBER_DECIMAL:
 3020:         case XML_REGEXP_NUMBER_LETTER:
 3021:         case XML_REGEXP_NUMBER_OTHERS:
 3022:         case XML_REGEXP_PUNCT:
 3023:         case XML_REGEXP_PUNCT_CONNECTOR:
 3024:         case XML_REGEXP_PUNCT_DASH:
 3025:         case XML_REGEXP_PUNCT_OPEN:
 3026:         case XML_REGEXP_PUNCT_CLOSE:
 3027:         case XML_REGEXP_PUNCT_INITQUOTE:
 3028:         case XML_REGEXP_PUNCT_FINQUOTE:
 3029:         case XML_REGEXP_PUNCT_OTHERS:
 3030:         case XML_REGEXP_SEPAR:
 3031:         case XML_REGEXP_SEPAR_SPACE:
 3032:         case XML_REGEXP_SEPAR_LINE:
 3033:         case XML_REGEXP_SEPAR_PARA:
 3034:         case XML_REGEXP_SYMBOL:
 3035:         case XML_REGEXP_SYMBOL_MATH:
 3036:         case XML_REGEXP_SYMBOL_CURRENCY:
 3037:         case XML_REGEXP_SYMBOL_MODIFIER:
 3038:         case XML_REGEXP_SYMBOL_OTHERS:
 3039:         case XML_REGEXP_OTHER:
 3040:         case XML_REGEXP_OTHER_CONTROL:
 3041:         case XML_REGEXP_OTHER_FORMAT:
 3042:         case XML_REGEXP_OTHER_PRIVATE:
 3043:         case XML_REGEXP_OTHER_NA:
 3044: 	case XML_REGEXP_BLOCK_NAME:
 3045: 	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
 3046: 		                            (const xmlChar *)atom->valuep);
 3047: 	    if (atom->neg)
 3048: 		ret = !ret;
 3049: 	    break;
 3050:     }
 3051:     return(ret);
 3052: }
 3053: 
 3054: /************************************************************************
 3055:  * 									*
 3056:  *	Saving and restoring state of an execution context		*
 3057:  * 									*
 3058:  ************************************************************************/
 3059: 
 3060: #ifdef DEBUG_REGEXP_EXEC
 3061: static void
 3062: xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
 3063:     printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
 3064:     if (exec->inputStack != NULL) {
 3065: 	int i;
 3066: 	printf(": ");
 3067: 	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
 3068: 	    printf("%s ", (const char *)
 3069: 	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
 3070:     } else {
 3071: 	printf(": %s", &(exec->inputString[exec->index]));
 3072:     }
 3073:     printf("\n");
 3074: }
 3075: #endif
 3076: 
 3077: static void
 3078: xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
 3079: #ifdef DEBUG_REGEXP_EXEC
 3080:     printf("saving ");
 3081:     exec->transno++;
 3082:     xmlFARegDebugExec(exec);
 3083:     exec->transno--;
 3084: #endif
 3085: #ifdef MAX_PUSH
 3086:     if (exec->nbPush > MAX_PUSH) {
 3087:         return;
 3088:     }
 3089:     exec->nbPush++;
 3090: #endif
 3091: 
 3092:     if (exec->maxRollbacks == 0) {
 3093: 	exec->maxRollbacks = 4;
 3094: 	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
 3095: 		                             sizeof(xmlRegExecRollback));
 3096: 	if (exec->rollbacks == NULL) {
 3097: 	    xmlRegexpErrMemory(NULL, "saving regexp");
 3098: 	    exec->maxRollbacks = 0;
 3099: 	    return;
 3100: 	}
 3101: 	memset(exec->rollbacks, 0,
 3102: 	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
 3103:     } else if (exec->nbRollbacks >= exec->maxRollbacks) {
 3104: 	xmlRegExecRollback *tmp;
 3105: 	int len = exec->maxRollbacks;
 3106: 
 3107: 	exec->maxRollbacks *= 2;
 3108: 	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
 3109: 			exec->maxRollbacks * sizeof(xmlRegExecRollback));
 3110: 	if (tmp == NULL) {
 3111: 	    xmlRegexpErrMemory(NULL, "saving regexp");
 3112: 	    exec->maxRollbacks /= 2;
 3113: 	    return;
 3114: 	}
 3115: 	exec->rollbacks = tmp;
 3116: 	tmp = &exec->rollbacks[len];
 3117: 	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
 3118:     }
 3119:     exec->rollbacks[exec->nbRollbacks].state = exec->state;
 3120:     exec->rollbacks[exec->nbRollbacks].index = exec->index;
 3121:     exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
 3122:     if (exec->comp->nbCounters > 0) {
 3123: 	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3124: 	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
 3125: 		xmlMalloc(exec->comp->nbCounters * sizeof(int));
 3126: 	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3127: 		xmlRegexpErrMemory(NULL, "saving regexp");
 3128: 		exec->status = -5;
 3129: 		return;
 3130: 	    }
 3131: 	}
 3132: 	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
 3133: 	       exec->comp->nbCounters * sizeof(int));
 3134:     }
 3135:     exec->nbRollbacks++;
 3136: }
 3137: 
 3138: static void
 3139: xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
 3140:     if (exec->nbRollbacks <= 0) {
 3141: 	exec->status = -1;
 3142: #ifdef DEBUG_REGEXP_EXEC
 3143: 	printf("rollback failed on empty stack\n");
 3144: #endif
 3145: 	return;
 3146:     }
 3147:     exec->nbRollbacks--;
 3148:     exec->state = exec->rollbacks[exec->nbRollbacks].state;
 3149:     exec->index = exec->rollbacks[exec->nbRollbacks].index;
 3150:     exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
 3151:     if (exec->comp->nbCounters > 0) {
 3152: 	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3153: 	    fprintf(stderr, "exec save: allocation failed");
 3154: 	    exec->status = -6;
 3155: 	    return;
 3156: 	}
 3157: 	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
 3158: 	       exec->comp->nbCounters * sizeof(int));
 3159:     }
 3160: 
 3161: #ifdef DEBUG_REGEXP_EXEC
 3162:     printf("restored ");
 3163:     xmlFARegDebugExec(exec);
 3164: #endif
 3165: }
 3166: 
 3167: /************************************************************************
 3168:  * 									*
 3169:  *	Verifier, running an input against a compiled regexp		*
 3170:  * 									*
 3171:  ************************************************************************/
 3172: 
 3173: static int
 3174: xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
 3175:     xmlRegExecCtxt execval;
 3176:     xmlRegExecCtxtPtr exec = &execval;
 3177:     int ret, codepoint = 0, len, deter;
 3178: 
 3179:     exec->inputString = content;
 3180:     exec->index = 0;
 3181:     exec->nbPush = 0;
 3182:     exec->determinist = 1;
 3183:     exec->maxRollbacks = 0;
 3184:     exec->nbRollbacks = 0;
 3185:     exec->rollbacks = NULL;
 3186:     exec->status = 0;
 3187:     exec->comp = comp;
 3188:     exec->state = comp->states[0];
 3189:     exec->transno = 0;
 3190:     exec->transcount = 0;
 3191:     exec->inputStack = NULL;
 3192:     exec->inputStackMax = 0;
 3193:     if (comp->nbCounters > 0) {
 3194: 	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
 3195: 	if (exec->counts == NULL) {
 3196: 	    xmlRegexpErrMemory(NULL, "running regexp");
 3197: 	    return(-1);
 3198: 	}
 3199:         memset(exec->counts, 0, comp->nbCounters * sizeof(int));
 3200:     } else
 3201: 	exec->counts = NULL;
 3202:     while ((exec->status == 0) &&
 3203: 	   ((exec->inputString[exec->index] != 0) ||
 3204: 	    ((exec->state != NULL) &&
 3205: 	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 3206: 	xmlRegTransPtr trans;
 3207: 	xmlRegAtomPtr atom;
 3208: 
 3209: 	/*
 3210: 	 * If end of input on non-terminal state, rollback, however we may
 3211: 	 * still have epsilon like transition for counted transitions
 3212: 	 * on counters, in that case don't break too early.  Additionally,
 3213: 	 * if we are working on a range like "AB{0,2}", where B is not present,
 3214: 	 * we don't want to break.
 3215: 	 */
 3216: 	len = 1;
 3217: 	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
 3218: 	    /*
 3219: 	     * if there is a transition, we must check if
 3220: 	     *  atom allows minOccurs of 0
 3221: 	     */
 3222: 	    if (exec->transno < exec->state->nbTrans) {
 3223: 	        trans = &exec->state->trans[exec->transno];
 3224: 		if (trans->to >=0) {
 3225: 		    atom = trans->atom;
 3226: 		    if (!((atom->min == 0) && (atom->max > 0)))
 3227: 		        goto rollback;
 3228: 		}
 3229: 	    } else
 3230: 	        goto rollback;
 3231: 	}
 3232: 
 3233: 	exec->transcount = 0;
 3234: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 3235: 	    trans = &exec->state->trans[exec->transno];
 3236: 	    if (trans->to < 0)
 3237: 		continue;
 3238: 	    atom = trans->atom;
 3239: 	    ret = 0;
 3240: 	    deter = 1;
 3241: 	    if (trans->count >= 0) {
 3242: 		int count;
 3243: 		xmlRegCounterPtr counter;
 3244: 
 3245: 		if (exec->counts == NULL) {
 3246: 		    exec->status = -1;
 3247: 		    goto error;
 3248: 		}
 3249: 		/*
 3250: 		 * A counted transition.
 3251: 		 */
 3252: 
 3253: 		count = exec->counts[trans->count];
 3254: 		counter = &exec->comp->counters[trans->count];
 3255: #ifdef DEBUG_REGEXP_EXEC
 3256: 		printf("testing count %d: val %d, min %d, max %d\n",
 3257: 		       trans->count, count, counter->min,  counter->max);
 3258: #endif
 3259: 		ret = ((count >= counter->min) && (count <= counter->max));
 3260: 		if ((ret) && (counter->min != counter->max))
 3261: 		    deter = 0;
 3262: 	    } else if (atom == NULL) {
 3263: 		fprintf(stderr, "epsilon transition left at runtime\n");
 3264: 		exec->status = -2;
 3265: 		break;
 3266: 	    } else if (exec->inputString[exec->index] != 0) {
 3267:                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
 3268: 		ret = xmlRegCheckCharacter(atom, codepoint);
 3269: 		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
 3270: 		    xmlRegStatePtr to = comp->states[trans->to];
 3271: 
 3272: 		    /*
 3273: 		     * this is a multiple input sequence
 3274: 		     * If there is a counter associated increment it now.
 3275: 		     * before potentially saving and rollback
 3276: 		     * do not increment if the counter is already over the
 3277: 		     * maximum limit in which case get to next transition
 3278: 		     */
 3279: 		    if (trans->counter >= 0) {
 3280: 			xmlRegCounterPtr counter;
 3281: 
 3282: 			if ((exec->counts == NULL) ||
 3283: 			    (exec->comp == NULL) ||
 3284: 			    (exec->comp->counters == NULL)) {
 3285: 			    exec->status = -1;
 3286: 			    goto error;
 3287: 			}
 3288: 			counter = &exec->comp->counters[trans->counter];
 3289: 			if (exec->counts[trans->counter] >= counter->max)
 3290: 			    continue; /* for loop on transitions */
 3291: 
 3292: #ifdef DEBUG_REGEXP_EXEC
 3293: 			printf("Increasing count %d\n", trans->counter);
 3294: #endif
 3295: 			exec->counts[trans->counter]++;
 3296: 		    }
 3297: 		    if (exec->state->nbTrans > exec->transno + 1) {
 3298: 			xmlFARegExecSave(exec);
 3299: 		    }
 3300: 		    exec->transcount = 1;
 3301: 		    do {
 3302: 			/*
 3303: 			 * Try to progress as much as possible on the input
 3304: 			 */
 3305: 			if (exec->transcount == atom->max) {
 3306: 			    break;
 3307: 			}
 3308: 			exec->index += len;
 3309: 			/*
 3310: 			 * End of input: stop here
 3311: 			 */
 3312: 			if (exec->inputString[exec->index] == 0) {
 3313: 			    exec->index -= len;
 3314: 			    break;
 3315: 			}
 3316: 			if (exec->transcount >= atom->min) {
 3317: 			    int transno = exec->transno;
 3318: 			    xmlRegStatePtr state = exec->state;
 3319: 
 3320: 			    /*
 3321: 			     * The transition is acceptable save it
 3322: 			     */
 3323: 			    exec->transno = -1; /* trick */
 3324: 			    exec->state = to;
 3325: 			    xmlFARegExecSave(exec);
 3326: 			    exec->transno = transno;
 3327: 			    exec->state = state;
 3328: 			}
 3329: 			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
 3330: 				              len);
 3331: 			ret = xmlRegCheckCharacter(atom, codepoint);
 3332: 			exec->transcount++;
 3333: 		    } while (ret == 1);
 3334: 		    if (exec->transcount < atom->min)
 3335: 			ret = 0;
 3336: 
 3337: 		    /*
 3338: 		     * If the last check failed but one transition was found
 3339: 		     * possible, rollback
 3340: 		     */
 3341: 		    if (ret < 0)
 3342: 			ret = 0;
 3343: 		    if (ret == 0) {
 3344: 			goto rollback;
 3345: 		    }
 3346: 		    if (trans->counter >= 0) {
 3347: 			if (exec->counts == NULL) {
 3348: 			    exec->status = -1;
 3349: 			    goto error;
 3350: 			}
 3351: #ifdef DEBUG_REGEXP_EXEC
 3352: 			printf("Decreasing count %d\n", trans->counter);
 3353: #endif
 3354: 			exec->counts[trans->counter]--;
 3355: 		    }
 3356: 		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
 3357: 		    /*
 3358: 		     * we don't match on the codepoint, but minOccurs of 0
 3359: 		     * says that's ok.  Setting len to 0 inhibits stepping
 3360: 		     * over the codepoint.
 3361: 		     */
 3362: 		    exec->transcount = 1;
 3363: 		    len = 0;
 3364: 		    ret = 1;
 3365: 		}
 3366: 	    } else if ((atom->min == 0) && (atom->max > 0)) {
 3367: 	        /* another spot to match when minOccurs is 0 */
 3368: 		exec->transcount = 1;
 3369: 		len = 0;
 3370: 		ret = 1;
 3371: 	    }
 3372: 	    if (ret == 1) {
 3373: 		if ((trans->nd == 1) ||
 3374: 		    ((trans->count >= 0) && (deter == 0) &&
 3375: 		     (exec->state->nbTrans > exec->transno + 1))) {
 3376: #ifdef DEBUG_REGEXP_EXEC
 3377: 		    if (trans->nd == 1)
 3378: 		        printf("Saving on nd transition atom %d for %c at %d\n",
 3379: 			       trans->atom->no, codepoint, exec->index);
 3380: 		    else
 3381: 		        printf("Saving on counted transition count %d for %c at %d\n",
 3382: 			       trans->count, codepoint, exec->index);
 3383: #endif
 3384: 		    xmlFARegExecSave(exec);
 3385: 		}
 3386: 		if (trans->counter >= 0) {
 3387: 		    xmlRegCounterPtr counter;
 3388: 
 3389:                     /* make sure we don't go over the counter maximum value */
 3390: 		    if ((exec->counts == NULL) ||
 3391: 			(exec->comp == NULL) ||
 3392: 			(exec->comp->counters == NULL)) {
 3393: 			exec->status = -1;
 3394: 			goto error;
 3395: 		    }
 3396: 		    counter = &exec->comp->counters[trans->counter];
 3397: 		    if (exec->counts[trans->counter] >= counter->max)
 3398: 			continue; /* for loop on transitions */
 3399: #ifdef DEBUG_REGEXP_EXEC
 3400: 		    printf("Increasing count %d\n", trans->counter);
 3401: #endif
 3402: 		    exec->counts[trans->counter]++;
 3403: 		}
 3404: 		if ((trans->count >= 0) &&
 3405: 		    (trans->count < REGEXP_ALL_COUNTER)) {
 3406: 		    if (exec->counts == NULL) {
 3407: 		        exec->status = -1;
 3408: 			goto error;
 3409: 		    }
 3410: #ifdef DEBUG_REGEXP_EXEC
 3411: 		    printf("resetting count %d on transition\n",
 3412: 		           trans->count);
 3413: #endif
 3414: 		    exec->counts[trans->count] = 0;
 3415: 		}
 3416: #ifdef DEBUG_REGEXP_EXEC
 3417: 		printf("entering state %d\n", trans->to);
 3418: #endif
 3419: 		exec->state = comp->states[trans->to];
 3420: 		exec->transno = 0;
 3421: 		if (trans->atom != NULL) {
 3422: 		    exec->index += len;
 3423: 		}
 3424: 		goto progress;
 3425: 	    } else if (ret < 0) {
 3426: 		exec->status = -4;
 3427: 		break;
 3428: 	    }
 3429: 	}
 3430: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 3431: rollback:
 3432: 	    /*
 3433: 	     * Failed to find a way out
 3434: 	     */
 3435: 	    exec->determinist = 0;
 3436: #ifdef DEBUG_REGEXP_EXEC
 3437: 	    printf("rollback from state %d on %d:%c\n", exec->state->no,
 3438: 	           codepoint,codepoint);
 3439: #endif
 3440: 	    xmlFARegExecRollBack(exec);
 3441: 	}
 3442: progress:
 3443: 	continue;
 3444:     }
 3445: error:
 3446:     if (exec->rollbacks != NULL) {
 3447: 	if (exec->counts != NULL) {
 3448: 	    int i;
 3449: 
 3450: 	    for (i = 0;i < exec->maxRollbacks;i++)
 3451: 		if (exec->rollbacks[i].counts != NULL)
 3452: 		    xmlFree(exec->rollbacks[i].counts);
 3453: 	}
 3454: 	xmlFree(exec->rollbacks);
 3455:     }
 3456:     if (exec->counts != NULL)
 3457: 	xmlFree(exec->counts);
 3458:     if (exec->status == 0)
 3459: 	return(1);
 3460:     if (exec->status == -1) {
 3461: 	if (exec->nbPush > MAX_PUSH)
 3462: 	    return(-1);
 3463: 	return(0);
 3464:     }
 3465:     return(exec->status);
 3466: }
 3467: 
 3468: /************************************************************************
 3469:  * 									*
 3470:  *	Progressive interface to the verifier one atom at a time	*
 3471:  * 									*
 3472:  ************************************************************************/
 3473: #ifdef DEBUG_ERR
 3474: static void testerr(xmlRegExecCtxtPtr exec);
 3475: #endif
 3476: 
 3477: /**
 3478:  * xmlRegNewExecCtxt:
 3479:  * @comp: a precompiled regular expression
 3480:  * @callback: a callback function used for handling progresses in the
 3481:  *            automata matching phase
 3482:  * @data: the context data associated to the callback in this context
 3483:  *
 3484:  * Build a context used for progressive evaluation of a regexp.
 3485:  *
 3486:  * Returns the new context
 3487:  */
 3488: xmlRegExecCtxtPtr
 3489: xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
 3490:     xmlRegExecCtxtPtr exec;
 3491: 
 3492:     if (comp == NULL)
 3493: 	return(NULL);
 3494:     if ((comp->compact == NULL) && (comp->states == NULL))
 3495:         return(NULL);
 3496:     exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
 3497:     if (exec == NULL) {
 3498: 	xmlRegexpErrMemory(NULL, "creating execution context");
 3499: 	return(NULL);
 3500:     }
 3501:     memset(exec, 0, sizeof(xmlRegExecCtxt));
 3502:     exec->inputString = NULL;
 3503:     exec->index = 0;
 3504:     exec->determinist = 1;
 3505:     exec->maxRollbacks = 0;
 3506:     exec->nbRollbacks = 0;
 3507:     exec->rollbacks = NULL;
 3508:     exec->status = 0;
 3509:     exec->comp = comp;
 3510:     if (comp->compact == NULL)
 3511: 	exec->state = comp->states[0];
 3512:     exec->transno = 0;
 3513:     exec->transcount = 0;
 3514:     exec->callback = callback;
 3515:     exec->data = data;
 3516:     if (comp->nbCounters > 0) {
 3517:         /*
 3518: 	 * For error handling, exec->counts is allocated twice the size
 3519: 	 * the second half is used to store the data in case of rollback
 3520: 	 */
 3521: 	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
 3522: 	                                 * 2);
 3523: 	if (exec->counts == NULL) {
 3524: 	    xmlRegexpErrMemory(NULL, "creating execution context");
 3525: 	    xmlFree(exec);
 3526: 	    return(NULL);
 3527: 	}
 3528:         memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
 3529: 	exec->errCounts = &exec->counts[comp->nbCounters];
 3530:     } else {
 3531: 	exec->counts = NULL;
 3532: 	exec->errCounts = NULL;
 3533:     }
 3534:     exec->inputStackMax = 0;
 3535:     exec->inputStackNr = 0;
 3536:     exec->inputStack = NULL;
 3537:     exec->errStateNo = -1;
 3538:     exec->errString = NULL;
 3539:     exec->nbPush = 0;
 3540:     return(exec);
 3541: }
 3542: 
 3543: /**
 3544:  * xmlRegFreeExecCtxt:
 3545:  * @exec: a regular expression evaulation context
 3546:  *
 3547:  * Free the structures associated to a regular expression evaulation context.
 3548:  */
 3549: void
 3550: xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
 3551:     if (exec == NULL)
 3552: 	return;
 3553: 
 3554:     if (exec->rollbacks != NULL) {
 3555: 	if (exec->counts != NULL) {
 3556: 	    int i;
 3557: 
 3558: 	    for (i = 0;i < exec->maxRollbacks;i++)
 3559: 		if (exec->rollbacks[i].counts != NULL)
 3560: 		    xmlFree(exec->rollbacks[i].counts);
 3561: 	}
 3562: 	xmlFree(exec->rollbacks);
 3563:     }
 3564:     if (exec->counts != NULL)
 3565: 	xmlFree(exec->counts);
 3566:     if (exec->inputStack != NULL) {
 3567: 	int i;
 3568: 
 3569: 	for (i = 0;i < exec->inputStackNr;i++) {
 3570: 	    if (exec->inputStack[i].value != NULL)
 3571: 		xmlFree(exec->inputStack[i].value);
 3572: 	}
 3573: 	xmlFree(exec->inputStack);
 3574:     }
 3575:     if (exec->errString != NULL)
 3576:         xmlFree(exec->errString);
 3577:     xmlFree(exec);
 3578: }
 3579: 
 3580: static void
 3581: xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 3582: 	                    void *data) {
 3583: #ifdef DEBUG_PUSH
 3584:     printf("saving value: %d:%s\n", exec->inputStackNr, value);
 3585: #endif
 3586:     if (exec->inputStackMax == 0) {
 3587: 	exec->inputStackMax = 4;
 3588: 	exec->inputStack = (xmlRegInputTokenPtr) 
 3589: 	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
 3590: 	if (exec->inputStack == NULL) {
 3591: 	    xmlRegexpErrMemory(NULL, "pushing input string");
 3592: 	    exec->inputStackMax = 0;
 3593: 	    return;
 3594: 	}
 3595:     } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
 3596: 	xmlRegInputTokenPtr tmp;
 3597: 
 3598: 	exec->inputStackMax *= 2;
 3599: 	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
 3600: 			exec->inputStackMax * sizeof(xmlRegInputToken));
 3601: 	if (tmp == NULL) {
 3602: 	    xmlRegexpErrMemory(NULL, "pushing input string");
 3603: 	    exec->inputStackMax /= 2;
 3604: 	    return;
 3605: 	}
 3606: 	exec->inputStack = tmp;
 3607:     }
 3608:     exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
 3609:     exec->inputStack[exec->inputStackNr].data = data;
 3610:     exec->inputStackNr++;
 3611:     exec->inputStack[exec->inputStackNr].value = NULL;
 3612:     exec->inputStack[exec->inputStackNr].data = NULL;
 3613: }
 3614: 
 3615: /**
 3616:  * xmlRegStrEqualWildcard:
 3617:  * @expStr:  the string to be evaluated 
 3618:  * @valStr:  the validation string
 3619:  *
 3620:  * Checks if both strings are equal or have the same content. "*"
 3621:  * can be used as a wildcard in @valStr; "|" is used as a seperator of 
 3622:  * substrings in both @expStr and @valStr.
 3623:  *
 3624:  * Returns 1 if the comparison is satisfied and the number of substrings
 3625:  * is equal, 0 otherwise.
 3626:  */
 3627: 
 3628: static int
 3629: xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
 3630:     if (expStr == valStr) return(1);
 3631:     if (expStr == NULL) return(0);
 3632:     if (valStr == NULL) return(0);
 3633:     do {
 3634: 	/*
 3635: 	* Eval if we have a wildcard for the current item.
 3636: 	*/
 3637:         if (*expStr != *valStr) {
 3638: 	    /* if one of them starts with a wildcard make valStr be it */
 3639: 	    if (*valStr == '*') {
 3640: 	        const xmlChar *tmp;
 3641: 
 3642: 		tmp = valStr;
 3643: 		valStr = expStr;
 3644: 		expStr = tmp;
 3645: 	    }
 3646: 	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
 3647: 		do {
 3648: 		    if (*valStr == XML_REG_STRING_SEPARATOR)
 3649: 			break;
 3650: 		    valStr++;
 3651: 		} while (*valStr != 0);
 3652: 		continue;
 3653: 	    } else
 3654: 		return(0);
 3655: 	}
 3656: 	expStr++;
 3657: 	valStr++;
 3658:     } while (*valStr != 0);
 3659:     if (*expStr != 0)
 3660: 	return (0);
 3661:     else
 3662: 	return (1);
 3663: }
 3664: 
 3665: /**
 3666:  * xmlRegCompactPushString:
 3667:  * @exec: a regexp execution context
 3668:  * @comp:  the precompiled exec with a compact table
 3669:  * @value: a string token input
 3670:  * @data: data associated to the token to reuse in callbacks
 3671:  *
 3672:  * Push one input token in the execution context
 3673:  *
 3674:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 3675:  *     a negative value in case of error.
 3676:  */
 3677: static int
 3678: xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
 3679: 	                xmlRegexpPtr comp,
 3680: 	                const xmlChar *value,
 3681: 	                void *data) {
 3682:     int state = exec->index;
 3683:     int i, target;
 3684: 
 3685:     if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
 3686: 	return(-1);
 3687:     
 3688:     if (value == NULL) {
 3689: 	/*
 3690: 	 * are we at a final state ?
 3691: 	 */
 3692: 	if (comp->compact[state * (comp->nbstrings + 1)] ==
 3693:             XML_REGEXP_FINAL_STATE)
 3694: 	    return(1);
 3695: 	return(0);
 3696:     }
 3697: 
 3698: #ifdef DEBUG_PUSH
 3699:     printf("value pushed: %s\n", value);
 3700: #endif
 3701: 
 3702:     /*
 3703:      * Examine all outside transitions from current state
 3704:      */
 3705:     for (i = 0;i < comp->nbstrings;i++) {
 3706: 	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 3707: 	if ((target > 0) && (target <= comp->nbstates)) {
 3708: 	    target--; /* to avoid 0 */    
 3709: 	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
 3710: 		exec->index = target;		
 3711: 		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
 3712: 		    exec->callback(exec->data, value,
 3713: 			  comp->transdata[state * comp->nbstrings + i], data);
 3714: 		}
 3715: #ifdef DEBUG_PUSH
 3716: 		printf("entering state %d\n", target);
 3717: #endif
 3718: 		if (comp->compact[target * (comp->nbstrings + 1)] ==
 3719: 		    XML_REGEXP_SINK_STATE)
 3720: 		    goto error;
 3721: 
 3722: 		if (comp->compact[target * (comp->nbstrings + 1)] ==
 3723: 		    XML_REGEXP_FINAL_STATE)
 3724: 		    return(1);
 3725: 		return(0);
 3726: 	    }
 3727: 	}
 3728:     }
 3729:     /*
 3730:      * Failed to find an exit transition out from current state for the
 3731:      * current token
 3732:      */
 3733: #ifdef DEBUG_PUSH
 3734:     printf("failed to find a transition for %s on state %d\n", value, state);
 3735: #endif
 3736: error:
 3737:     if (exec->errString != NULL)
 3738:         xmlFree(exec->errString);
 3739:     exec->errString = xmlStrdup(value);
 3740:     exec->errStateNo = state;
 3741:     exec->status = -1;
 3742: #ifdef DEBUG_ERR
 3743:     testerr(exec);
 3744: #endif
 3745:     return(-1);
 3746: }
 3747: 
 3748: /**
 3749:  * xmlRegExecPushStringInternal:
 3750:  * @exec: a regexp execution context or NULL to indicate the end
 3751:  * @value: a string token input
 3752:  * @data: data associated to the token to reuse in callbacks
 3753:  * @compound: value was assembled from 2 strings
 3754:  *
 3755:  * Push one input token in the execution context
 3756:  *
 3757:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 3758:  *     a negative value in case of error.
 3759:  */
 3760: static int
 3761: xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
 3762: 	                     void *data, int compound) {
 3763:     xmlRegTransPtr trans;
 3764:     xmlRegAtomPtr atom;
 3765:     int ret;
 3766:     int final = 0;
 3767:     int progress = 1;
 3768: 
 3769:     if (exec == NULL)
 3770: 	return(-1);
 3771:     if (exec->comp == NULL)
 3772: 	return(-1);
 3773:     if (exec->status != 0)
 3774: 	return(exec->status);
 3775: 
 3776:     if (exec->comp->compact != NULL)
 3777: 	return(xmlRegCompactPushString(exec, exec->comp, value, data));
 3778: 
 3779:     if (value == NULL) {
 3780:         if (exec->state->type == XML_REGEXP_FINAL_STATE)
 3781: 	    return(1);
 3782: 	final = 1;
 3783:     }
 3784: 
 3785: #ifdef DEBUG_PUSH
 3786:     printf("value pushed: %s\n", value);
 3787: #endif
 3788:     /*
 3789:      * If we have an active rollback stack push the new value there
 3790:      * and get back to where we were left
 3791:      */
 3792:     if ((value != NULL) && (exec->inputStackNr > 0)) {
 3793: 	xmlFARegExecSaveInputString(exec, value, data);
 3794: 	value = exec->inputStack[exec->index].value;
 3795: 	data = exec->inputStack[exec->index].data;
 3796: #ifdef DEBUG_PUSH
 3797: 	printf("value loaded: %s\n", value);
 3798: #endif
 3799:     }
 3800: 
 3801:     while ((exec->status == 0) &&
 3802: 	   ((value != NULL) ||
 3803: 	    ((final == 1) &&
 3804: 	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 3805: 
 3806: 	/*
 3807: 	 * End of input on non-terminal state, rollback, however we may
 3808: 	 * still have epsilon like transition for counted transitions
 3809: 	 * on counters, in that case don't break too early.
 3810: 	 */
 3811: 	if ((value == NULL) && (exec->counts == NULL))
 3812: 	    goto rollback;
 3813: 
 3814: 	exec->transcount = 0;
 3815: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 3816: 	    trans = &exec->state->trans[exec->transno];
 3817: 	    if (trans->to < 0)
 3818: 		continue;
 3819: 	    atom = trans->atom;
 3820: 	    ret = 0;
 3821: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 3822: 		int i;
 3823: 		int count;
 3824: 		xmlRegTransPtr t;
 3825: 		xmlRegCounterPtr counter;
 3826: 
 3827: 		ret = 0;
 3828: 
 3829: #ifdef DEBUG_PUSH
 3830: 		printf("testing all lax %d\n", trans->count);
 3831: #endif
 3832: 		/*
 3833: 		 * Check all counted transitions from the current state
 3834: 		 */
 3835: 		if ((value == NULL) && (final)) {
 3836: 		    ret = 1;
 3837: 		} else if (value != NULL) {
 3838: 		    for (i = 0;i < exec->state->nbTrans;i++) {
 3839: 			t = &exec->state->trans[i];
 3840: 			if ((t->counter < 0) || (t == trans))
 3841: 			    continue;
 3842: 			counter = &exec->comp->counters[t->counter];
 3843: 			count = exec->counts[t->counter];
 3844: 			if ((count < counter->max) && 
 3845: 		            (t->atom != NULL) &&
 3846: 			    (xmlStrEqual(value, t->atom->valuep))) {
 3847: 			    ret = 0;
 3848: 			    break;
 3849: 			}
 3850: 			if ((count >= counter->min) &&
 3851: 			    (count < counter->max) &&
 3852: 			    (t->atom != NULL) &&
 3853: 			    (xmlStrEqual(value, t->atom->valuep))) {
 3854: 			    ret = 1;
 3855: 			    break;
 3856: 			}
 3857: 		    }
 3858: 		}
 3859: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 3860: 		int i;
 3861: 		int count;
 3862: 		xmlRegTransPtr t;
 3863: 		xmlRegCounterPtr counter;
 3864: 
 3865: 		ret = 1;
 3866: 
 3867: #ifdef DEBUG_PUSH
 3868: 		printf("testing all %d\n", trans->count);
 3869: #endif
 3870: 		/*
 3871: 		 * Check all counted transitions from the current state
 3872: 		 */
 3873: 		for (i = 0;i < exec->state->nbTrans;i++) {
 3874:                     t = &exec->state->trans[i];
 3875: 		    if ((t->counter < 0) || (t == trans))
 3876: 			continue;
 3877:                     counter = &exec->comp->counters[t->counter];
 3878: 		    count = exec->counts[t->counter];
 3879: 		    if ((count < counter->min) || (count > counter->max)) {
 3880: 			ret = 0;
 3881: 			break;
 3882: 		    }
 3883: 		}
 3884: 	    } else if (trans->count >= 0) {
 3885: 		int count;
 3886: 		xmlRegCounterPtr counter;
 3887: 
 3888: 		/*
 3889: 		 * A counted transition.
 3890: 		 */
 3891: 
 3892: 		count = exec->counts[trans->count];
 3893: 		counter = &exec->comp->counters[trans->count];
 3894: #ifdef DEBUG_PUSH
 3895: 		printf("testing count %d: val %d, min %d, max %d\n",
 3896: 		       trans->count, count, counter->min,  counter->max);
 3897: #endif
 3898: 		ret = ((count >= counter->min) && (count <= counter->max));
 3899: 	    } else if (atom == NULL) {
 3900: 		fprintf(stderr, "epsilon transition left at runtime\n");
 3901: 		exec->status = -2;
 3902: 		break;
 3903: 	    } else if (value != NULL) {
 3904: 		ret = xmlRegStrEqualWildcard(atom->valuep, value);
 3905: 		if (atom->neg) {
 3906: 		    ret = !ret;
 3907: 		    if (!compound)
 3908: 		        ret = 0;
 3909: 		}
 3910: 		if ((ret == 1) && (trans->counter >= 0)) {
 3911: 		    xmlRegCounterPtr counter;
 3912: 		    int count;
 3913: 
 3914: 		    count = exec->counts[trans->counter];
 3915: 		    counter = &exec->comp->counters[trans->counter];
 3916: 		    if (count >= counter->max)
 3917: 			ret = 0;
 3918: 		}
 3919: 
 3920: 		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
 3921: 		    xmlRegStatePtr to = exec->comp->states[trans->to];
 3922: 
 3923: 		    /*
 3924: 		     * this is a multiple input sequence
 3925: 		     */
 3926: 		    if (exec->state->nbTrans > exec->transno + 1) {
 3927: 			if (exec->inputStackNr <= 0) {
 3928: 			    xmlFARegExecSaveInputString(exec, value, data);
 3929: 			}
 3930: 			xmlFARegExecSave(exec);
 3931: 		    }
 3932: 		    exec->transcount = 1;
 3933: 		    do {
 3934: 			/*
 3935: 			 * Try to progress as much as possible on the input
 3936: 			 */
 3937: 			if (exec->transcount == atom->max) {
 3938: 			    break;
 3939: 			}
 3940: 			exec->index++;
 3941: 			value = exec->inputStack[exec->index].value;
 3942: 			data = exec->inputStack[exec->index].data;
 3943: #ifdef DEBUG_PUSH
 3944: 			printf("value loaded: %s\n", value);
 3945: #endif
 3946: 
 3947: 			/*
 3948: 			 * End of input: stop here
 3949: 			 */
 3950: 			if (value == NULL) {
 3951: 			    exec->index --;
 3952: 			    break;
 3953: 			}
 3954: 			if (exec->transcount >= atom->min) {
 3955: 			    int transno = exec->transno;
 3956: 			    xmlRegStatePtr state = exec->state;
 3957: 
 3958: 			    /*
 3959: 			     * The transition is acceptable save it
 3960: 			     */
 3961: 			    exec->transno = -1; /* trick */
 3962: 			    exec->state = to;
 3963: 			    if (exec->inputStackNr <= 0) {
 3964: 				xmlFARegExecSaveInputString(exec, value, data);
 3965: 			    }
 3966: 			    xmlFARegExecSave(exec);
 3967: 			    exec->transno = transno;
 3968: 			    exec->state = state;
 3969: 			}
 3970: 			ret = xmlStrEqual(value, atom->valuep);
 3971: 			exec->transcount++;
 3972: 		    } while (ret == 1);
 3973: 		    if (exec->transcount < atom->min)
 3974: 			ret = 0;
 3975: 
 3976: 		    /*
 3977: 		     * If the last check failed but one transition was found
 3978: 		     * possible, rollback
 3979: 		     */
 3980: 		    if (ret < 0)
 3981: 			ret = 0;
 3982: 		    if (ret == 0) {
 3983: 			goto rollback;
 3984: 		    }
 3985: 		}
 3986: 	    }
 3987: 	    if (ret == 1) {
 3988: 		if ((exec->callback != NULL) && (atom != NULL) &&
 3989: 			(data != NULL)) {
 3990: 		    exec->callback(exec->data, atom->valuep,
 3991: 			           atom->data, data);
 3992: 		}
 3993: 		if (exec->state->nbTrans > exec->transno + 1) {
 3994: 		    if (exec->inputStackNr <= 0) {
 3995: 			xmlFARegExecSaveInputString(exec, value, data);
 3996: 		    }
 3997: 		    xmlFARegExecSave(exec);
 3998: 		}
 3999: 		if (trans->counter >= 0) {
 4000: #ifdef DEBUG_PUSH
 4001: 		    printf("Increasing count %d\n", trans->counter);
 4002: #endif
 4003: 		    exec->counts[trans->counter]++;
 4004: 		}
 4005: 		if ((trans->count >= 0) &&
 4006: 		    (trans->count < REGEXP_ALL_COUNTER)) {
 4007: #ifdef DEBUG_REGEXP_EXEC
 4008: 		    printf("resetting count %d on transition\n",
 4009: 		           trans->count);
 4010: #endif
 4011: 		    exec->counts[trans->count] = 0;
 4012: 		}
 4013: #ifdef DEBUG_PUSH
 4014: 		printf("entering state %d\n", trans->to);
 4015: #endif
 4016:                 if ((exec->comp->states[trans->to] != NULL) &&
 4017: 		    (exec->comp->states[trans->to]->type ==
 4018: 		     XML_REGEXP_SINK_STATE)) {
 4019: 		    /*
 4020: 		     * entering a sink state, save the current state as error
 4021: 		     * state.
 4022: 		     */
 4023: 		    if (exec->errString != NULL)
 4024: 			xmlFree(exec->errString);
 4025: 		    exec->errString = xmlStrdup(value);
 4026: 		    exec->errState = exec->state;
 4027: 		    memcpy(exec->errCounts, exec->counts,
 4028: 			   exec->comp->nbCounters * sizeof(int));
 4029: 		}
 4030: 		exec->state = exec->comp->states[trans->to];
 4031: 		exec->transno = 0;
 4032: 		if (trans->atom != NULL) {
 4033: 		    if (exec->inputStack != NULL) {
 4034: 			exec->index++;
 4035: 			if (exec->index < exec->inputStackNr) {
 4036: 			    value = exec->inputStack[exec->index].value;
 4037: 			    data = exec->inputStack[exec->index].data;
 4038: #ifdef DEBUG_PUSH
 4039: 			    printf("value loaded: %s\n", value);
 4040: #endif
 4041: 			} else {
 4042: 			    value = NULL;
 4043: 			    data = NULL;
 4044: #ifdef DEBUG_PUSH
 4045: 			    printf("end of input\n");
 4046: #endif
 4047: 			}
 4048: 		    } else {
 4049: 			value = NULL;
 4050: 			data = NULL;
 4051: #ifdef DEBUG_PUSH
 4052: 			printf("end of input\n");
 4053: #endif
 4054: 		    }
 4055: 		}
 4056: 		goto progress;
 4057: 	    } else if (ret < 0) {
 4058: 		exec->status = -4;
 4059: 		break;
 4060: 	    }
 4061: 	}
 4062: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 4063: rollback:
 4064:             /*
 4065: 	     * if we didn't yet rollback on the current input
 4066: 	     * store the current state as the error state.
 4067: 	     */
 4068: 	    if ((progress) && (exec->state != NULL) &&
 4069: 	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
 4070: 	        progress = 0;
 4071: 		if (exec->errString != NULL)
 4072: 		    xmlFree(exec->errString);
 4073: 		exec->errString = xmlStrdup(value);
 4074: 		exec->errState = exec->state;
 4075: 		memcpy(exec->errCounts, exec->counts,
 4076: 		       exec->comp->nbCounters * sizeof(int));
 4077: 	    }
 4078: 
 4079: 	    /*
 4080: 	     * Failed to find a way out
 4081: 	     */
 4082: 	    exec->determinist = 0;
 4083: 	    xmlFARegExecRollBack(exec);
 4084: 	    if (exec->status == 0) {
 4085: 		value = exec->inputStack[exec->index].value;
 4086: 		data = exec->inputStack[exec->index].data;
 4087: #ifdef DEBUG_PUSH
 4088: 		printf("value loaded: %s\n", value);
 4089: #endif
 4090: 	    }
 4091: 	}
 4092: 	continue;
 4093: progress:
 4094:         progress = 1;
 4095: 	continue;
 4096:     }
 4097:     if (exec->status == 0) {
 4098:         return(exec->state->type == XML_REGEXP_FINAL_STATE);
 4099:     }
 4100: #ifdef DEBUG_ERR
 4101:     if (exec->status < 0) {
 4102: 	testerr(exec);
 4103:     }
 4104: #endif
 4105:     return(exec->status);
 4106: }
 4107: 
 4108: /**
 4109:  * xmlRegExecPushString:
 4110:  * @exec: a regexp execution context or NULL to indicate the end
 4111:  * @value: a string token input
 4112:  * @data: data associated to the token to reuse in callbacks
 4113:  *
 4114:  * Push one input token in the execution context
 4115:  *
 4116:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 4117:  *     a negative value in case of error.
 4118:  */
 4119: int
 4120: xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 4121: 	             void *data) {
 4122:     return(xmlRegExecPushStringInternal(exec, value, data, 0));
 4123: }
 4124: 
 4125: /**
 4126:  * xmlRegExecPushString2:
 4127:  * @exec: a regexp execution context or NULL to indicate the end
 4128:  * @value: the first string token input
 4129:  * @value2: the second string token input
 4130:  * @data: data associated to the token to reuse in callbacks
 4131:  *
 4132:  * Push one input token in the execution context
 4133:  *
 4134:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 4135:  *     a negative value in case of error.
 4136:  */
 4137: int
 4138: xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
 4139:                       const xmlChar *value2, void *data) {
 4140:     xmlChar buf[150];
 4141:     int lenn, lenp, ret;
 4142:     xmlChar *str;
 4143: 
 4144:     if (exec == NULL)
 4145: 	return(-1);
 4146:     if (exec->comp == NULL)
 4147: 	return(-1);
 4148:     if (exec->status != 0)
 4149: 	return(exec->status);
 4150: 
 4151:     if (value2 == NULL)
 4152:         return(xmlRegExecPushString(exec, value, data));
 4153: 
 4154:     lenn = strlen((char *) value2);
 4155:     lenp = strlen((char *) value);
 4156: 
 4157:     if (150 < lenn + lenp + 2) {
 4158: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 4159: 	if (str == NULL) {
 4160: 	    exec->status = -1;
 4161: 	    return(-1);
 4162: 	}
 4163:     } else {
 4164: 	str = buf;
 4165:     }
 4166:     memcpy(&str[0], value, lenp);
 4167:     str[lenp] = XML_REG_STRING_SEPARATOR;
 4168:     memcpy(&str[lenp + 1], value2, lenn);
 4169:     str[lenn + lenp + 1] = 0;
 4170: 
 4171:     if (exec->comp->compact != NULL)
 4172: 	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
 4173:     else
 4174:         ret = xmlRegExecPushStringInternal(exec, str, data, 1);
 4175: 
 4176:     if (str != buf)
 4177:         xmlFree(str);
 4178:     return(ret);
 4179: }
 4180: 
 4181: /**
 4182:  * xmlRegExecGetValues:
 4183:  * @exec: a regexp execution context
 4184:  * @err: error extraction or normal one
 4185:  * @nbval: pointer to the number of accepted values IN/OUT
 4186:  * @nbneg: return number of negative transitions
 4187:  * @values: pointer to the array of acceptable values
 4188:  * @terminal: return value if this was a terminal state
 4189:  *
 4190:  * Extract informations from the regexp execution, internal routine to
 4191:  * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
 4192:  *
 4193:  * Returns: 0 in case of success or -1 in case of error.
 4194:  */
 4195: static int
 4196: xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
 4197:                     int *nbval, int *nbneg,
 4198: 		    xmlChar **values, int *terminal) {
 4199:     int maxval;
 4200:     int nb = 0;
 4201: 
 4202:     if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
 4203:         (values == NULL) || (*nbval <= 0))
 4204:         return(-1);
 4205: 
 4206:     maxval = *nbval;
 4207:     *nbval = 0;
 4208:     *nbneg = 0;
 4209:     if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
 4210:         xmlRegexpPtr comp;
 4211: 	int target, i, state;
 4212: 
 4213:         comp = exec->comp;
 4214: 
 4215: 	if (err) {
 4216: 	    if (exec->errStateNo == -1) return(-1);
 4217: 	    state = exec->errStateNo;
 4218: 	} else {
 4219: 	    state = exec->index;
 4220: 	}
 4221: 	if (terminal != NULL) {
 4222: 	    if (comp->compact[state * (comp->nbstrings + 1)] ==
 4223: 	        XML_REGEXP_FINAL_STATE)
 4224: 		*terminal = 1;
 4225: 	    else
 4226: 		*terminal = 0;
 4227: 	}
 4228: 	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 4229: 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 4230: 	    if ((target > 0) && (target <= comp->nbstates) &&
 4231: 	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
 4232: 		 XML_REGEXP_SINK_STATE)) {
 4233: 	        values[nb++] = comp->stringMap[i];
 4234: 		(*nbval)++;
 4235: 	    }
 4236: 	}
 4237: 	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 4238: 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 4239: 	    if ((target > 0) && (target <= comp->nbstates) &&
 4240: 	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
 4241: 		 XML_REGEXP_SINK_STATE)) {
 4242: 	        values[nb++] = comp->stringMap[i];
 4243: 		(*nbneg)++;
 4244: 	    }
 4245: 	}
 4246:     } else {
 4247:         int transno;
 4248: 	xmlRegTransPtr trans;
 4249: 	xmlRegAtomPtr atom;
 4250: 	xmlRegStatePtr state;
 4251: 
 4252: 	if (terminal != NULL) {
 4253: 	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
 4254: 		*terminal = 1;
 4255: 	    else
 4256: 		*terminal = 0;
 4257: 	}
 4258: 
 4259: 	if (err) {
 4260: 	    if (exec->errState == NULL) return(-1);
 4261: 	    state = exec->errState;
 4262: 	} else {
 4263: 	    if (exec->state == NULL) return(-1);
 4264: 	    state = exec->state;
 4265: 	}
 4266: 	for (transno = 0;
 4267: 	     (transno < state->nbTrans) && (nb < maxval);
 4268: 	     transno++) {
 4269: 	    trans = &state->trans[transno];
 4270: 	    if (trans->to < 0)
 4271: 		continue;
 4272: 	    atom = trans->atom;
 4273: 	    if ((atom == NULL) || (atom->valuep == NULL))
 4274: 		continue;
 4275: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 4276: 	        /* this should not be reached but ... */
 4277: 	        TODO;
 4278: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 4279: 	        /* this should not be reached but ... */
 4280: 	        TODO;
 4281: 	    } else if (trans->counter >= 0) {
 4282: 		xmlRegCounterPtr counter = NULL;
 4283: 		int count;
 4284: 
 4285: 		if (err)
 4286: 		    count = exec->errCounts[trans->counter];
 4287: 		else
 4288: 		    count = exec->counts[trans->counter];
 4289: 		if (exec->comp != NULL)
 4290: 		    counter = &exec->comp->counters[trans->counter];
 4291: 		if ((counter == NULL) || (count < counter->max)) {
 4292: 		    if (atom->neg)
 4293: 			values[nb++] = (xmlChar *) atom->valuep2;
 4294: 		    else
 4295: 			values[nb++] = (xmlChar *) atom->valuep;
 4296: 		    (*nbval)++;
 4297: 		}
 4298: 	    } else {
 4299:                 if ((exec->comp->states[trans->to] != NULL) &&
 4300: 		    (exec->comp->states[trans->to]->type !=
 4301: 		     XML_REGEXP_SINK_STATE)) {
 4302: 		    if (atom->neg)
 4303: 			values[nb++] = (xmlChar *) atom->valuep2;
 4304: 		    else
 4305: 			values[nb++] = (xmlChar *) atom->valuep;
 4306: 		    (*nbval)++;
 4307: 		}
 4308: 	    } 
 4309: 	}
 4310: 	for (transno = 0;
 4311: 	     (transno < state->nbTrans) && (nb < maxval);
 4312: 	     transno++) {
 4313: 	    trans = &state->trans[transno];
 4314: 	    if (trans->to < 0)
 4315: 		continue;
 4316: 	    atom = trans->atom;
 4317: 	    if ((atom == NULL) || (atom->valuep == NULL))
 4318: 		continue;
 4319: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 4320: 	        continue;
 4321: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 4322: 	        continue;
 4323: 	    } else if (trans->counter >= 0) {
 4324: 	        continue;
 4325: 	    } else {
 4326:                 if ((exec->comp->states[trans->to] != NULL) &&
 4327: 		    (exec->comp->states[trans->to]->type ==
 4328: 		     XML_REGEXP_SINK_STATE)) {
 4329: 		    if (atom->neg)
 4330: 			values[nb++] = (xmlChar *) atom->valuep2;
 4331: 		    else
 4332: 			values[nb++] = (xmlChar *) atom->valuep;
 4333: 		    (*nbneg)++;
 4334: 		}
 4335: 	    } 
 4336: 	}
 4337:     }
 4338:     return(0);
 4339: }
 4340: 
 4341: /**
 4342:  * xmlRegExecNextValues:
 4343:  * @exec: a regexp execution context
 4344:  * @nbval: pointer to the number of accepted values IN/OUT
 4345:  * @nbneg: return number of negative transitions
 4346:  * @values: pointer to the array of acceptable values
 4347:  * @terminal: return value if this was a terminal state
 4348:  *
 4349:  * Extract informations from the regexp execution,
 4350:  * the parameter @values must point to an array of @nbval string pointers
 4351:  * on return nbval will contain the number of possible strings in that
 4352:  * state and the @values array will be updated with them. The string values
 4353:  * returned will be freed with the @exec context and don't need to be
 4354:  * deallocated.
 4355:  *
 4356:  * Returns: 0 in case of success or -1 in case of error.
 4357:  */
 4358: int
 4359: xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
 4360:                      xmlChar **values, int *terminal) {
 4361:     return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
 4362: }
 4363: 
 4364: /**
 4365:  * xmlRegExecErrInfo:
 4366:  * @exec: a regexp execution context generating an error
 4367:  * @string: return value for the error string
 4368:  * @nbval: pointer to the number of accepted values IN/OUT
 4369:  * @nbneg: return number of negative transitions
 4370:  * @values: pointer to the array of acceptable values
 4371:  * @terminal: return value if this was a terminal state
 4372:  *
 4373:  * Extract error informations from the regexp execution, the parameter
 4374:  * @string will be updated with the value pushed and not accepted,
 4375:  * the parameter @values must point to an array of @nbval string pointers
 4376:  * on return nbval will contain the number of possible strings in that
 4377:  * state and the @values array will be updated with them. The string values
 4378:  * returned will be freed with the @exec context and don't need to be
 4379:  * deallocated.
 4380:  *
 4381:  * Returns: 0 in case of success or -1 in case of error.
 4382:  */
 4383: int
 4384: xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
 4385:                   int *nbval, int *nbneg, xmlChar **values, int *terminal) {
 4386:     if (exec == NULL)
 4387:         return(-1);
 4388:     if (string != NULL) {
 4389:         if (exec->status != 0)
 4390: 	    *string = exec->errString;
 4391: 	else
 4392: 	    *string = NULL;
 4393:     }
 4394:     return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
 4395: }
 4396: 
 4397: #ifdef DEBUG_ERR
 4398: static void testerr(xmlRegExecCtxtPtr exec) {
 4399:     const xmlChar *string;
 4400:     xmlChar *values[5];
 4401:     int nb = 5;
 4402:     int nbneg;
 4403:     int terminal;
 4404:     xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
 4405: }
 4406: #endif
 4407: 
 4408: #if 0
 4409: static int
 4410: xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
 4411:     xmlRegTransPtr trans;
 4412:     xmlRegAtomPtr atom;
 4413:     int ret;
 4414:     int codepoint, len;
 4415: 
 4416:     if (exec == NULL)
 4417: 	return(-1);
 4418:     if (exec->status != 0)
 4419: 	return(exec->status);
 4420: 
 4421:     while ((exec->status == 0) &&
 4422: 	   ((exec->inputString[exec->index] != 0) ||
 4423: 	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
 4424: 
 4425: 	/*
 4426: 	 * End of input on non-terminal state, rollback, however we may
 4427: 	 * still have epsilon like transition for counted transitions
 4428: 	 * on counters, in that case don't break too early.
 4429: 	 */
 4430: 	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
 4431: 	    goto rollback;
 4432: 
 4433: 	exec->transcount = 0;
 4434: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 4435: 	    trans = &exec->state->trans[exec->transno];
 4436: 	    if (trans->to < 0)
 4437: 		continue;
 4438: 	    atom = trans->atom;
 4439: 	    ret = 0;
 4440: 	    if (trans->count >= 0) {
 4441: 		int count;
 4442: 		xmlRegCounterPtr counter;
 4443: 
 4444: 		/*
 4445: 		 * A counted transition.
 4446: 		 */
 4447: 
 4448: 		count = exec->counts[trans->count];
 4449: 		counter = &exec->comp->counters[trans->count];
 4450: #ifdef DEBUG_REGEXP_EXEC
 4451: 		printf("testing count %d: val %d, min %d, max %d\n",
 4452: 		       trans->count, count, counter->min,  counter->max);
 4453: #endif
 4454: 		ret = ((count >= counter->min) && (count <= counter->max));
 4455: 	    } else if (atom == NULL) {
 4456: 		fprintf(stderr, "epsilon transition left at runtime\n");
 4457: 		exec->status = -2;
 4458: 		break;
 4459: 	    } else if (exec->inputString[exec->index] != 0) {
 4460:                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
 4461: 		ret = xmlRegCheckCharacter(atom, codepoint);
 4462: 		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
 4463: 		    xmlRegStatePtr to = exec->comp->states[trans->to];
 4464: 
 4465: 		    /*
 4466: 		     * this is a multiple input sequence
 4467: 		     */
 4468: 		    if (exec->state->nbTrans > exec->transno + 1) {
 4469: 			xmlFARegExecSave(exec);
 4470: 		    }
 4471: 		    exec->transcount = 1;
 4472: 		    do {
 4473: 			/*
 4474: 			 * Try to progress as much as possible on the input
 4475: 			 */
 4476: 			if (exec->transcount == atom->max) {
 4477: 			    break;
 4478: 			}
 4479: 			exec->index += len;
 4480: 			/*
 4481: 			 * End of input: stop here
 4482: 			 */
 4483: 			if (exec->inputString[exec->index] == 0) {
 4484: 			    exec->index -= len;
 4485: 			    break;
 4486: 			}
 4487: 			if (exec->transcount >= atom->min) {
 4488: 			    int transno = exec->transno;
 4489: 			    xmlRegStatePtr state = exec->state;
 4490: 
 4491: 			    /*
 4492: 			     * The transition is acceptable save it
 4493: 			     */
 4494: 			    exec->transno = -1; /* trick */
 4495: 			    exec->state = to;
 4496: 			    xmlFARegExecSave(exec);
 4497: 			    exec->transno = transno;
 4498: 			    exec->state = state;
 4499: 			}
 4500: 			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
 4501: 				              len);
 4502: 			ret = xmlRegCheckCharacter(atom, codepoint);
 4503: 			exec->transcount++;
 4504: 		    } while (ret == 1);
 4505: 		    if (exec->transcount < atom->min)
 4506: 			ret = 0;
 4507: 
 4508: 		    /*
 4509: 		     * If the last check failed but one transition was found
 4510: 		     * possible, rollback
 4511: 		     */
 4512: 		    if (ret < 0)
 4513: 			ret = 0;
 4514: 		    if (ret == 0) {
 4515: 			goto rollback;
 4516: 		    }
 4517: 		}
 4518: 	    }
 4519: 	    if (ret == 1) {
 4520: 		if (exec->state->nbTrans > exec->transno + 1) {
 4521: 		    xmlFARegExecSave(exec);
 4522: 		}
 4523: 		/*
 4524: 		 * restart count for expressions like this ((abc){2})*
 4525: 		 */
 4526: 		if (trans->count >= 0) {
 4527: #ifdef DEBUG_REGEXP_EXEC
 4528: 		    printf("Reset count %d\n", trans->count);
 4529: #endif
 4530: 		    exec->counts[trans->count] = 0;
 4531: 		}
 4532: 		if (trans->counter >= 0) {
 4533: #ifdef DEBUG_REGEXP_EXEC
 4534: 		    printf("Increasing count %d\n", trans->counter);
 4535: #endif
 4536: 		    exec->counts[trans->counter]++;
 4537: 		}
 4538: #ifdef DEBUG_REGEXP_EXEC
 4539: 		printf("entering state %d\n", trans->to);
 4540: #endif
 4541: 		exec->state = exec->comp->states[trans->to];
 4542: 		exec->transno = 0;
 4543: 		if (trans->atom != NULL) {
 4544: 		    exec->index += len;
 4545: 		}
 4546: 		goto progress;
 4547: 	    } else if (ret < 0) {
 4548: 		exec->status = -4;
 4549: 		break;
 4550: 	    }
 4551: 	}
 4552: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 4553: rollback:
 4554: 	    /*
 4555: 	     * Failed to find a way out
 4556: 	     */
 4557: 	    exec->determinist = 0;
 4558: 	    xmlFARegExecRollBack(exec);
 4559: 	}
 4560: progress:
 4561: 	continue;
 4562:     }
 4563: }
 4564: #endif
 4565: /************************************************************************
 4566:  * 									*
 4567:  *	Parser for the Schemas Datatype Regular Expressions		*
 4568:  *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
 4569:  * 									*
 4570:  ************************************************************************/
 4571: 
 4572: /**
 4573:  * xmlFAIsChar:
 4574:  * @ctxt:  a regexp parser context
 4575:  *
 4576:  * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
 4577:  */
 4578: static int
 4579: xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
 4580:     int cur;
 4581:     int len;
 4582: 
 4583:     cur = CUR_SCHAR(ctxt->cur, len);
 4584:     if ((cur == '.') || (cur == '\\') || (cur == '?') ||
 4585: 	(cur == '*') || (cur == '+') || (cur == '(') ||
 4586: 	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
 4587: 	(cur == 0x5D) || (cur == 0))
 4588: 	return(-1);
 4589:     return(cur);
 4590: }
 4591: 
 4592: /**
 4593:  * xmlFAParseCharProp:
 4594:  * @ctxt:  a regexp parser context
 4595:  *
 4596:  * [27]   charProp   ::=   IsCategory | IsBlock
 4597:  * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
 4598:  *                       Separators | Symbols | Others 
 4599:  * [29]   Letters   ::=   'L' [ultmo]?
 4600:  * [30]   Marks   ::=   'M' [nce]?
 4601:  * [31]   Numbers   ::=   'N' [dlo]?
 4602:  * [32]   Punctuation   ::=   'P' [cdseifo]?
 4603:  * [33]   Separators   ::=   'Z' [slp]?
 4604:  * [34]   Symbols   ::=   'S' [mcko]?
 4605:  * [35]   Others   ::=   'C' [cfon]?
 4606:  * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
 4607:  */
 4608: static void
 4609: xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
 4610:     int cur;
 4611:     xmlRegAtomType type = (xmlRegAtomType) 0;
 4612:     xmlChar *blockName = NULL;
 4613:     
 4614:     cur = CUR;
 4615:     if (cur == 'L') {
 4616: 	NEXT;
 4617: 	cur = CUR;
 4618: 	if (cur == 'u') {
 4619: 	    NEXT;
 4620: 	    type = XML_REGEXP_LETTER_UPPERCASE;
 4621: 	} else if (cur == 'l') {
 4622: 	    NEXT;
 4623: 	    type = XML_REGEXP_LETTER_LOWERCASE;
 4624: 	} else if (cur == 't') {
 4625: 	    NEXT;
 4626: 	    type = XML_REGEXP_LETTER_TITLECASE;
 4627: 	} else if (cur == 'm') {
 4628: 	    NEXT;
 4629: 	    type = XML_REGEXP_LETTER_MODIFIER;
 4630: 	} else if (cur == 'o') {
 4631: 	    NEXT;
 4632: 	    type = XML_REGEXP_LETTER_OTHERS;
 4633: 	} else {
 4634: 	    type = XML_REGEXP_LETTER;
 4635: 	}
 4636:     } else if (cur == 'M') {
 4637: 	NEXT;
 4638: 	cur = CUR;
 4639: 	if (cur == 'n') {
 4640: 	    NEXT;
 4641: 	    /* nonspacing */
 4642: 	    type = XML_REGEXP_MARK_NONSPACING;
 4643: 	} else if (cur == 'c') {
 4644: 	    NEXT;
 4645: 	    /* spacing combining */
 4646: 	    type = XML_REGEXP_MARK_SPACECOMBINING;
 4647: 	} else if (cur == 'e') {
 4648: 	    NEXT;
 4649: 	    /* enclosing */
 4650: 	    type = XML_REGEXP_MARK_ENCLOSING;
 4651: 	} else {
 4652: 	    /* all marks */
 4653: 	    type = XML_REGEXP_MARK;
 4654: 	}
 4655:     } else if (cur == 'N') {
 4656: 	NEXT;
 4657: 	cur = CUR;
 4658: 	if (cur == 'd') {
 4659: 	    NEXT;
 4660: 	    /* digital */
 4661: 	    type = XML_REGEXP_NUMBER_DECIMAL;
 4662: 	} else if (cur == 'l') {
 4663: 	    NEXT;
 4664: 	    /* letter */
 4665: 	    type = XML_REGEXP_NUMBER_LETTER;
 4666: 	} else if (cur == 'o') {
 4667: 	    NEXT;
 4668: 	    /* other */
 4669: 	    type = XML_REGEXP_NUMBER_OTHERS;
 4670: 	} else {
 4671: 	    /* all numbers */
 4672: 	    type = XML_REGEXP_NUMBER;
 4673: 	}
 4674:     } else if (cur == 'P') {
 4675: 	NEXT;
 4676: 	cur = CUR;
 4677: 	if (cur == 'c') {
 4678: 	    NEXT;
 4679: 	    /* connector */
 4680: 	    type = XML_REGEXP_PUNCT_CONNECTOR;
 4681: 	} else if (cur == 'd') {
 4682: 	    NEXT;
 4683: 	    /* dash */
 4684: 	    type = XML_REGEXP_PUNCT_DASH;
 4685: 	} else if (cur == 's') {
 4686: 	    NEXT;
 4687: 	    /* open */
 4688: 	    type = XML_REGEXP_PUNCT_OPEN;
 4689: 	} else if (cur == 'e') {
 4690: 	    NEXT;
 4691: 	    /* close */
 4692: 	    type = XML_REGEXP_PUNCT_CLOSE;
 4693: 	} else if (cur == 'i') {
 4694: 	    NEXT;
 4695: 	    /* initial quote */
 4696: 	    type = XML_REGEXP_PUNCT_INITQUOTE;
 4697: 	} else if (cur == 'f') {
 4698: 	    NEXT;
 4699: 	    /* final quote */
 4700: 	    type = XML_REGEXP_PUNCT_FINQUOTE;
 4701: 	} else if (cur == 'o') {
 4702: 	    NEXT;
 4703: 	    /* other */
 4704: 	    type = XML_REGEXP_PUNCT_OTHERS;
 4705: 	} else {
 4706: 	    /* all punctuation */
 4707: 	    type = XML_REGEXP_PUNCT;
 4708: 	}
 4709:     } else if (cur == 'Z') {
 4710: 	NEXT;
 4711: 	cur = CUR;
 4712: 	if (cur == 's') {
 4713: 	    NEXT;
 4714: 	    /* space */
 4715: 	    type = XML_REGEXP_SEPAR_SPACE;
 4716: 	} else if (cur == 'l') {
 4717: 	    NEXT;
 4718: 	    /* line */
 4719: 	    type = XML_REGEXP_SEPAR_LINE;
 4720: 	} else if (cur == 'p') {
 4721: 	    NEXT;
 4722: 	    /* paragraph */
 4723: 	    type = XML_REGEXP_SEPAR_PARA;
 4724: 	} else {
 4725: 	    /* all separators */
 4726: 	    type = XML_REGEXP_SEPAR;
 4727: 	}
 4728:     } else if (cur == 'S') {
 4729: 	NEXT;
 4730: 	cur = CUR;
 4731: 	if (cur == 'm') {
 4732: 	    NEXT;
 4733: 	    type = XML_REGEXP_SYMBOL_MATH;
 4734: 	    /* math */
 4735: 	} else if (cur == 'c') {
 4736: 	    NEXT;
 4737: 	    type = XML_REGEXP_SYMBOL_CURRENCY;
 4738: 	    /* currency */
 4739: 	} else if (cur == 'k') {
 4740: 	    NEXT;
 4741: 	    type = XML_REGEXP_SYMBOL_MODIFIER;
 4742: 	    /* modifiers */
 4743: 	} else if (cur == 'o') {
 4744: 	    NEXT;
 4745: 	    type = XML_REGEXP_SYMBOL_OTHERS;
 4746: 	    /* other */
 4747: 	} else {
 4748: 	    /* all symbols */
 4749: 	    type = XML_REGEXP_SYMBOL;
 4750: 	}
 4751:     } else if (cur == 'C') {
 4752: 	NEXT;
 4753: 	cur = CUR;
 4754: 	if (cur == 'c') {
 4755: 	    NEXT;
 4756: 	    /* control */
 4757: 	    type = XML_REGEXP_OTHER_CONTROL;
 4758: 	} else if (cur == 'f') {
 4759: 	    NEXT;
 4760: 	    /* format */
 4761: 	    type = XML_REGEXP_OTHER_FORMAT;
 4762: 	} else if (cur == 'o') {
 4763: 	    NEXT;
 4764: 	    /* private use */
 4765: 	    type = XML_REGEXP_OTHER_PRIVATE;
 4766: 	} else if (cur == 'n') {
 4767: 	    NEXT;
 4768: 	    /* not assigned */
 4769: 	    type = XML_REGEXP_OTHER_NA;
 4770: 	} else {
 4771: 	    /* all others */
 4772: 	    type = XML_REGEXP_OTHER;
 4773: 	}
 4774:     } else if (cur == 'I') {
 4775: 	const xmlChar *start;
 4776: 	NEXT;
 4777: 	cur = CUR;
 4778: 	if (cur != 's') {
 4779: 	    ERROR("IsXXXX expected");
 4780: 	    return;
 4781: 	}
 4782: 	NEXT;
 4783: 	start = ctxt->cur;
 4784: 	cur = CUR;
 4785: 	if (((cur >= 'a') && (cur <= 'z')) || 
 4786: 	    ((cur >= 'A') && (cur <= 'Z')) || 
 4787: 	    ((cur >= '0') && (cur <= '9')) || 
 4788: 	    (cur == 0x2D)) {
 4789: 	    NEXT;
 4790: 	    cur = CUR;
 4791: 	    while (((cur >= 'a') && (cur <= 'z')) || 
 4792: 		((cur >= 'A') && (cur <= 'Z')) || 
 4793: 		((cur >= '0') && (cur <= '9')) || 
 4794: 		(cur == 0x2D)) {
 4795: 		NEXT;
 4796: 		cur = CUR;
 4797: 	    }
 4798: 	}
 4799: 	type = XML_REGEXP_BLOCK_NAME;
 4800: 	blockName = xmlStrndup(start, ctxt->cur - start);
 4801:     } else {
 4802: 	ERROR("Unknown char property");
 4803: 	return;
 4804:     }
 4805:     if (ctxt->atom == NULL) {
 4806: 	ctxt->atom = xmlRegNewAtom(ctxt, type);
 4807: 	if (ctxt->atom != NULL)
 4808: 	    ctxt->atom->valuep = blockName;
 4809:     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4810:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4811: 		           type, 0, 0, blockName);
 4812:     }
 4813: }
 4814: 
 4815: /**
 4816:  * xmlFAParseCharClassEsc:
 4817:  * @ctxt:  a regexp parser context
 4818:  *
 4819:  * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 
 4820:  * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
 4821:  * [25] catEsc   ::=   '\p{' charProp '}'
 4822:  * [26] complEsc ::=   '\P{' charProp '}'
 4823:  * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
 4824:  */
 4825: static void
 4826: xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
 4827:     int cur;
 4828: 
 4829:     if (CUR == '.') {
 4830: 	if (ctxt->atom == NULL) {
 4831: 	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
 4832: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4833: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4834: 			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
 4835: 	}
 4836: 	NEXT;
 4837: 	return;
 4838:     }
 4839:     if (CUR != '\\') {
 4840: 	ERROR("Escaped sequence: expecting \\");
 4841: 	return;
 4842:     }
 4843:     NEXT;
 4844:     cur = CUR;
 4845:     if (cur == 'p') {
 4846: 	NEXT;
 4847: 	if (CUR != '{') {
 4848: 	    ERROR("Expecting '{'");
 4849: 	    return;
 4850: 	}
 4851: 	NEXT;
 4852: 	xmlFAParseCharProp(ctxt);
 4853: 	if (CUR != '}') {
 4854: 	    ERROR("Expecting '}'");
 4855: 	    return;
 4856: 	}
 4857: 	NEXT;
 4858:     } else if (cur == 'P') {
 4859: 	NEXT;
 4860: 	if (CUR != '{') {
 4861: 	    ERROR("Expecting '{'");
 4862: 	    return;
 4863: 	}
 4864: 	NEXT;
 4865: 	xmlFAParseCharProp(ctxt);
 4866: 	ctxt->atom->neg = 1;
 4867: 	if (CUR != '}') {
 4868: 	    ERROR("Expecting '}'");
 4869: 	    return;
 4870: 	}
 4871: 	NEXT;
 4872:     } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
 4873: 	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
 4874: 	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
 4875: 	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
 4876: 	(cur == 0x5E)) {
 4877: 	if (ctxt->atom == NULL) {
 4878: 	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
 4879: 	    if (ctxt->atom != NULL) {
 4880: 	        switch (cur) {
 4881: 		    case 'n':
 4882: 		        ctxt->atom->codepoint = '\n';
 4883: 			break;
 4884: 		    case 'r':
 4885: 		        ctxt->atom->codepoint = '\r';
 4886: 			break;
 4887: 		    case 't':
 4888: 		        ctxt->atom->codepoint = '\t';
 4889: 			break;
 4890: 		    default:
 4891: 			ctxt->atom->codepoint = cur;
 4892: 		}
 4893: 	    }
 4894: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4895:             switch (cur) {
 4896:                 case 'n':
 4897:                     cur = '\n';
 4898:                     break;
 4899:                 case 'r':
 4900:                     cur = '\r';
 4901:                     break;
 4902:                 case 't':
 4903:                     cur = '\t';
 4904:                     break;
 4905:             }
 4906: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4907: 			       XML_REGEXP_CHARVAL, cur, cur, NULL);
 4908: 	}
 4909: 	NEXT;
 4910:     } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
 4911: 	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
 4912: 	(cur == 'w') || (cur == 'W')) {
 4913: 	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
 4914: 
 4915: 	switch (cur) {
 4916: 	    case 's': 
 4917: 		type = XML_REGEXP_ANYSPACE;
 4918: 		break;
 4919: 	    case 'S': 
 4920: 		type = XML_REGEXP_NOTSPACE;
 4921: 		break;
 4922: 	    case 'i': 
 4923: 		type = XML_REGEXP_INITNAME;
 4924: 		break;
 4925: 	    case 'I': 
 4926: 		type = XML_REGEXP_NOTINITNAME;
 4927: 		break;
 4928: 	    case 'c': 
 4929: 		type = XML_REGEXP_NAMECHAR;
 4930: 		break;
 4931: 	    case 'C': 
 4932: 		type = XML_REGEXP_NOTNAMECHAR;
 4933: 		break;
 4934: 	    case 'd': 
 4935: 		type = XML_REGEXP_DECIMAL;
 4936: 		break;
 4937: 	    case 'D': 
 4938: 		type = XML_REGEXP_NOTDECIMAL;
 4939: 		break;
 4940: 	    case 'w': 
 4941: 		type = XML_REGEXP_REALCHAR;
 4942: 		break;
 4943: 	    case 'W': 
 4944: 		type = XML_REGEXP_NOTREALCHAR;
 4945: 		break;
 4946: 	}
 4947: 	NEXT;
 4948: 	if (ctxt->atom == NULL) {
 4949: 	    ctxt->atom = xmlRegNewAtom(ctxt, type);
 4950: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4951: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4952: 			       type, 0, 0, NULL);
 4953: 	}
 4954:     } else {
 4955: 	ERROR("Wrong escape sequence, misuse of character '\\'");
 4956:     }
 4957: }
 4958: 
 4959: /**
 4960:  * xmlFAParseCharRange:
 4961:  * @ctxt:  a regexp parser context
 4962:  *
 4963:  * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash 
 4964:  * [18]   seRange   ::=   charOrEsc '-' charOrEsc
 4965:  * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
 4966:  * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
 4967:  * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
 4968:  */
 4969: static void
 4970: xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
 4971:     int cur, len;
 4972:     int start = -1;
 4973:     int end = -1;
 4974: 
 4975:     if (CUR == '\0') {
 4976:         ERROR("Expecting ']'");
 4977: 	return;
 4978:     }
 4979: 
 4980:     cur = CUR;
 4981:     if (cur == '\\') {
 4982: 	NEXT;
 4983: 	cur = CUR;
 4984: 	switch (cur) {
 4985: 	    case 'n': start = 0xA; break;
 4986: 	    case 'r': start = 0xD; break;
 4987: 	    case 't': start = 0x9; break;
 4988: 	    case '\\': case '|': case '.': case '-': case '^': case '?':
 4989: 	    case '*': case '+': case '{': case '}': case '(': case ')':
 4990: 	    case '[': case ']':
 4991: 		start = cur; break;
 4992: 	    default:
 4993: 		ERROR("Invalid escape value");
 4994: 		return;
 4995: 	}
 4996: 	end = start;
 4997:         len = 1;
 4998:     } else if ((cur != 0x5B) && (cur != 0x5D)) {
 4999:         end = start = CUR_SCHAR(ctxt->cur, len);
 5000:     } else {
 5001: 	ERROR("Expecting a char range");
 5002: 	return;
 5003:     }
 5004:     /*
 5005:      * Since we are "inside" a range, we can assume ctxt->cur is past
 5006:      * the start of ctxt->string, and PREV should be safe
 5007:      */
 5008:     if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
 5009: 	NEXTL(len);
 5010: 	return;
 5011:     }
 5012:     NEXTL(len);
 5013:     cur = CUR;
 5014:     if ((cur != '-') || (NXT(1) == ']')) {
 5015:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 5016: 		              XML_REGEXP_CHARVAL, start, end, NULL);
 5017: 	return;
 5018:     }
 5019:     NEXT;
 5020:     cur = CUR;
 5021:     if (cur == '\\') {
 5022: 	NEXT;
 5023: 	cur = CUR;
 5024: 	switch (cur) {
 5025: 	    case 'n': end = 0xA; break;
 5026: 	    case 'r': end = 0xD; break;
 5027: 	    case 't': end = 0x9; break;
 5028: 	    case '\\': case '|': case '.': case '-': case '^': case '?':
 5029: 	    case '*': case '+': case '{': case '}': case '(': case ')':
 5030: 	    case '[': case ']':
 5031: 		end = cur; break;
 5032: 	    default:
 5033: 		ERROR("Invalid escape value");
 5034: 		return;
 5035: 	}
 5036:         len = 1;
 5037:     } else if ((cur != 0x5B) && (cur != 0x5D)) {
 5038:         end = CUR_SCHAR(ctxt->cur, len);
 5039:     } else {
 5040: 	ERROR("Expecting the end of a char range");
 5041: 	return;
 5042:     }
 5043:     NEXTL(len);
 5044:     /* TODO check that the values are acceptable character ranges for XML */
 5045:     if (end < start) {
 5046: 	ERROR("End of range is before start of range");
 5047:     } else {
 5048:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 5049: 		           XML_REGEXP_CHARVAL, start, end, NULL);
 5050:     }
 5051:     return;
 5052: }
 5053: 
 5054: /**
 5055:  * xmlFAParsePosCharGroup:
 5056:  * @ctxt:  a regexp parser context
 5057:  *
 5058:  * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
 5059:  */
 5060: static void
 5061: xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
 5062:     do {
 5063: 	if (CUR == '\\') {
 5064: 	    xmlFAParseCharClassEsc(ctxt);
 5065: 	} else {
 5066: 	    xmlFAParseCharRange(ctxt);
 5067: 	}
 5068:     } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
 5069:              (CUR != 0) && (ctxt->error == 0));
 5070: }
 5071: 
 5072: /**
 5073:  * xmlFAParseCharGroup:
 5074:  * @ctxt:  a regexp parser context
 5075:  *
 5076:  * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
 5077:  * [15]   negCharGroup ::= '^' posCharGroup
 5078:  * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  
 5079:  * [12]   charClassExpr ::= '[' charGroup ']'
 5080:  */
 5081: static void
 5082: xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
 5083:     int n = ctxt->neg;
 5084:     while ((CUR != ']') && (ctxt->error == 0)) {
 5085: 	if (CUR == '^') {
 5086: 	    int neg = ctxt->neg;
 5087: 
 5088: 	    NEXT;
 5089: 	    ctxt->neg = !ctxt->neg;
 5090: 	    xmlFAParsePosCharGroup(ctxt);
 5091: 	    ctxt->neg = neg;
 5092: 	} else if ((CUR == '-') && (NXT(1) == '[')) {
 5093: 	    int neg = ctxt->neg;
 5094: 	    ctxt->neg = 2;
 5095: 	    NEXT;	/* eat the '-' */
 5096: 	    NEXT;	/* eat the '[' */
 5097: 	    xmlFAParseCharGroup(ctxt);
 5098: 	    if (CUR == ']') {
 5099: 		NEXT;
 5100: 	    } else {
 5101: 		ERROR("charClassExpr: ']' expected");
 5102: 		break;
 5103: 	    }
 5104: 	    ctxt->neg = neg;
 5105: 	    break;
 5106: 	} else if (CUR != ']') {
 5107: 	    xmlFAParsePosCharGroup(ctxt);
 5108: 	}
 5109:     }
 5110:     ctxt->neg = n;
 5111: }
 5112: 
 5113: /**
 5114:  * xmlFAParseCharClass:
 5115:  * @ctxt:  a regexp parser context
 5116:  *
 5117:  * [11]   charClass   ::=     charClassEsc | charClassExpr
 5118:  * [12]   charClassExpr   ::=   '[' charGroup ']'
 5119:  */
 5120: static void
 5121: xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
 5122:     if (CUR == '[') {
 5123: 	NEXT;
 5124: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
 5125: 	if (ctxt->atom == NULL)
 5126: 	    return;
 5127: 	xmlFAParseCharGroup(ctxt);
 5128: 	if (CUR == ']') {
 5129: 	    NEXT;
 5130: 	} else {
 5131: 	    ERROR("xmlFAParseCharClass: ']' expected");
 5132: 	}
 5133:     } else {
 5134: 	xmlFAParseCharClassEsc(ctxt);
 5135:     }
 5136: }
 5137: 
 5138: /**
 5139:  * xmlFAParseQuantExact:
 5140:  * @ctxt:  a regexp parser context
 5141:  *
 5142:  * [8]   QuantExact   ::=   [0-9]+
 5143:  *
 5144:  * Returns 0 if success or -1 in case of error
 5145:  */
 5146: static int
 5147: xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
 5148:     int ret = 0;
 5149:     int ok = 0;
 5150: 
 5151:     while ((CUR >= '0') && (CUR <= '9')) {
 5152: 	ret = ret * 10 + (CUR - '0');
 5153: 	ok = 1;
 5154: 	NEXT;
 5155:     }
 5156:     if (ok != 1) {
 5157: 	return(-1);
 5158:     }
 5159:     return(ret);
 5160: }
 5161: 
 5162: /**
 5163:  * xmlFAParseQuantifier:
 5164:  * @ctxt:  a regexp parser context
 5165:  *
 5166:  * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
 5167:  * [5]   quantity   ::=   quantRange | quantMin | QuantExact
 5168:  * [6]   quantRange   ::=   QuantExact ',' QuantExact
 5169:  * [7]   quantMin   ::=   QuantExact ','
 5170:  * [8]   QuantExact   ::=   [0-9]+
 5171:  */
 5172: static int
 5173: xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
 5174:     int cur;
 5175: 
 5176:     cur = CUR;
 5177:     if ((cur == '?') || (cur == '*') || (cur == '+')) {
 5178: 	if (ctxt->atom != NULL) {
 5179: 	    if (cur == '?')
 5180: 		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
 5181: 	    else if (cur == '*')
 5182: 		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
 5183: 	    else if (cur == '+')
 5184: 		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
 5185: 	}
 5186: 	NEXT;
 5187: 	return(1);
 5188:     }
 5189:     if (cur == '{') {
 5190: 	int min = 0, max = 0;
 5191: 
 5192: 	NEXT;
 5193: 	cur = xmlFAParseQuantExact(ctxt);
 5194: 	if (cur >= 0)
 5195: 	    min = cur;
 5196: 	if (CUR == ',') {
 5197: 	    NEXT;
 5198: 	    if (CUR == '}')
 5199: 	        max = INT_MAX;
 5200: 	    else {
 5201: 	        cur = xmlFAParseQuantExact(ctxt);
 5202: 	        if (cur >= 0)
 5203: 		    max = cur;
 5204: 		else {
 5205: 		    ERROR("Improper quantifier");
 5206: 		}
 5207: 	    }
 5208: 	}
 5209: 	if (CUR == '}') {
 5210: 	    NEXT;
 5211: 	} else {
 5212: 	    ERROR("Unterminated quantifier");
 5213: 	}
 5214: 	if (max == 0)
 5215: 	    max = min;
 5216: 	if (ctxt->atom != NULL) {
 5217: 	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
 5218: 	    ctxt->atom->min = min;
 5219: 	    ctxt->atom->max = max;
 5220: 	}
 5221: 	return(1);
 5222:     }
 5223:     return(0);
 5224: }
 5225: 
 5226: /**
 5227:  * xmlFAParseAtom:
 5228:  * @ctxt:  a regexp parser context
 5229:  *
 5230:  * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
 5231:  */
 5232: static int
 5233: xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
 5234:     int codepoint, len;
 5235: 
 5236:     codepoint = xmlFAIsChar(ctxt);
 5237:     if (codepoint > 0) {
 5238: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
 5239: 	if (ctxt->atom == NULL)
 5240: 	    return(-1);
 5241: 	codepoint = CUR_SCHAR(ctxt->cur, len);
 5242: 	ctxt->atom->codepoint = codepoint;
 5243: 	NEXTL(len);
 5244: 	return(1);
 5245:     } else if (CUR == '|') {
 5246: 	return(0);
 5247:     } else if (CUR == 0) {
 5248: 	return(0);
 5249:     } else if (CUR == ')') {
 5250: 	return(0);
 5251:     } else if (CUR == '(') {
 5252: 	xmlRegStatePtr start, oldend, start0;
 5253: 
 5254: 	NEXT;
 5255: 	/*
 5256: 	 * this extra Epsilon transition is needed if we count with 0 allowed
 5257: 	 * unfortunately this can't be known at that point
 5258: 	 */
 5259: 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 5260: 	start0 = ctxt->state;
 5261: 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 5262: 	start = ctxt->state;
 5263: 	oldend = ctxt->end;
 5264: 	ctxt->end = NULL;
 5265: 	ctxt->atom = NULL;
 5266: 	xmlFAParseRegExp(ctxt, 0);
 5267: 	if (CUR == ')') {
 5268: 	    NEXT;
 5269: 	} else {
 5270: 	    ERROR("xmlFAParseAtom: expecting ')'");
 5271: 	}
 5272: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
 5273: 	if (ctxt->atom == NULL)
 5274: 	    return(-1);
 5275: 	ctxt->atom->start = start;
 5276: 	ctxt->atom->start0 = start0;
 5277: 	ctxt->atom->stop = ctxt->state;
 5278: 	ctxt->end = oldend;
 5279: 	return(1);
 5280:     } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
 5281: 	xmlFAParseCharClass(ctxt);
 5282: 	return(1);
 5283:     }
 5284:     return(0);
 5285: }
 5286: 
 5287: /**
 5288:  * xmlFAParsePiece:
 5289:  * @ctxt:  a regexp parser context
 5290:  *
 5291:  * [3]   piece   ::=   atom quantifier?
 5292:  */
 5293: static int
 5294: xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
 5295:     int ret;
 5296: 
 5297:     ctxt->atom = NULL;
 5298:     ret = xmlFAParseAtom(ctxt);
 5299:     if (ret == 0)
 5300: 	return(0);
 5301:     if (ctxt->atom == NULL) {
 5302: 	ERROR("internal: no atom generated");
 5303:     }
 5304:     xmlFAParseQuantifier(ctxt);
 5305:     return(1);
 5306: }
 5307: 
 5308: /**
 5309:  * xmlFAParseBranch:
 5310:  * @ctxt:  a regexp parser context
 5311:  * @to: optional target to the end of the branch
 5312:  *
 5313:  * @to is used to optimize by removing duplicate path in automata
 5314:  * in expressions like (a|b)(c|d)
 5315:  *
 5316:  * [2]   branch   ::=   piece*
 5317:  */
 5318: static int
 5319: xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
 5320:     xmlRegStatePtr previous;
 5321:     int ret;
 5322: 
 5323:     previous = ctxt->state;
 5324:     ret = xmlFAParsePiece(ctxt);
 5325:     if (ret != 0) {
 5326: 	if (xmlFAGenerateTransitions(ctxt, previous, 
 5327: 	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 5328: 	    return(-1);
 5329: 	previous = ctxt->state;
 5330: 	ctxt->atom = NULL;
 5331:     }
 5332:     while ((ret != 0) && (ctxt->error == 0)) {
 5333: 	ret = xmlFAParsePiece(ctxt);
 5334: 	if (ret != 0) {
 5335: 	    if (xmlFAGenerateTransitions(ctxt, previous, 
 5336: 	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 5337: 		    return(-1);
 5338: 	    previous = ctxt->state;
 5339: 	    ctxt->atom = NULL;
 5340: 	}
 5341:     }
 5342:     return(0);
 5343: }
 5344: 
 5345: /**
 5346:  * xmlFAParseRegExp:
 5347:  * @ctxt:  a regexp parser context
 5348:  * @top:  is this the top-level expression ?
 5349:  *
 5350:  * [1]   regExp   ::=     branch  ( '|' branch )*
 5351:  */
 5352: static void
 5353: xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
 5354:     xmlRegStatePtr start, end;
 5355: 
 5356:     /* if not top start should have been generated by an epsilon trans */
 5357:     start = ctxt->state;
 5358:     ctxt->end = NULL;
 5359:     xmlFAParseBranch(ctxt, NULL);
 5360:     if (top) {
 5361: #ifdef DEBUG_REGEXP_GRAPH
 5362: 	printf("State %d is final\n", ctxt->state->no);
 5363: #endif
 5364: 	ctxt->state->type = XML_REGEXP_FINAL_STATE;
 5365:     }
 5366:     if (CUR != '|') {
 5367: 	ctxt->end = ctxt->state;
 5368: 	return;
 5369:     }
 5370:     end = ctxt->state;
 5371:     while ((CUR == '|') && (ctxt->error == 0)) {
 5372: 	NEXT;
 5373: 	ctxt->state = start;
 5374: 	ctxt->end = NULL;
 5375: 	xmlFAParseBranch(ctxt, end);
 5376:     }
 5377:     if (!top) {
 5378: 	ctxt->state = end;
 5379: 	ctxt->end = end;
 5380:     }
 5381: }
 5382: 
 5383: /************************************************************************
 5384:  * 									*
 5385:  * 			The basic API					*
 5386:  * 									*
 5387:  ************************************************************************/
 5388: 
 5389: /**
 5390:  * xmlRegexpPrint:
 5391:  * @output: the file for the output debug
 5392:  * @regexp: the compiled regexp
 5393:  *
 5394:  * Print the content of the compiled regular expression
 5395:  */
 5396: void
 5397: xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
 5398:     int i;
 5399: 
 5400:     if (output == NULL)
 5401:         return;
 5402:     fprintf(output, " regexp: ");
 5403:     if (regexp == NULL) {
 5404: 	fprintf(output, "NULL\n");
 5405: 	return;
 5406:     }
 5407:     fprintf(output, "'%s' ", regexp->string);
 5408:     fprintf(output, "\n");
 5409:     fprintf(output, "%d atoms:\n", regexp->nbAtoms);
 5410:     for (i = 0;i < regexp->nbAtoms; i++) {
 5411: 	fprintf(output, " %02d ", i);
 5412: 	xmlRegPrintAtom(output, regexp->atoms[i]);
 5413:     }
 5414:     fprintf(output, "%d states:", regexp->nbStates);
 5415:     fprintf(output, "\n");
 5416:     for (i = 0;i < regexp->nbStates; i++) {
 5417: 	xmlRegPrintState(output, regexp->states[i]);
 5418:     }
 5419:     fprintf(output, "%d counters:\n", regexp->nbCounters);
 5420:     for (i = 0;i < regexp->nbCounters; i++) {
 5421: 	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
 5422: 		                                regexp->counters[i].max);
 5423:     }
 5424: }
 5425: 
 5426: /**
 5427:  * xmlRegexpCompile:
 5428:  * @regexp:  a regular expression string
 5429:  *
 5430:  * Parses a regular expression conforming to XML Schemas Part 2 Datatype
 5431:  * Appendix F and builds an automata suitable for testing strings against
 5432:  * that regular expression
 5433:  *
 5434:  * Returns the compiled expression or NULL in case of error
 5435:  */
 5436: xmlRegexpPtr
 5437: xmlRegexpCompile(const xmlChar *regexp) {
 5438:     xmlRegexpPtr ret;
 5439:     xmlRegParserCtxtPtr ctxt;
 5440: 
 5441:     ctxt = xmlRegNewParserCtxt(regexp);
 5442:     if (ctxt == NULL)
 5443: 	return(NULL);
 5444: 
 5445:     /* initialize the parser */
 5446:     ctxt->end = NULL;
 5447:     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
 5448:     xmlRegStatePush(ctxt, ctxt->start);
 5449: 
 5450:     /* parse the expression building an automata */
 5451:     xmlFAParseRegExp(ctxt, 1);
 5452:     if (CUR != 0) {
 5453: 	ERROR("xmlFAParseRegExp: extra characters");
 5454:     }
 5455:     if (ctxt->error != 0) {
 5456: 	xmlRegFreeParserCtxt(ctxt);
 5457: 	return(NULL);
 5458:     }
 5459:     ctxt->end = ctxt->state;
 5460:     ctxt->start->type = XML_REGEXP_START_STATE;
 5461:     ctxt->end->type = XML_REGEXP_FINAL_STATE;
 5462: 
 5463:     /* remove the Epsilon except for counted transitions */
 5464:     xmlFAEliminateEpsilonTransitions(ctxt);
 5465: 
 5466: 
 5467:     if (ctxt->error != 0) {
 5468: 	xmlRegFreeParserCtxt(ctxt);
 5469: 	return(NULL);
 5470:     }
 5471:     ret = xmlRegEpxFromParse(ctxt);
 5472:     xmlRegFreeParserCtxt(ctxt);
 5473:     return(ret);
 5474: }
 5475: 
 5476: /**
 5477:  * xmlRegexpExec:
 5478:  * @comp:  the compiled regular expression
 5479:  * @content:  the value to check against the regular expression
 5480:  *
 5481:  * Check if the regular expression generates the value
 5482:  *
 5483:  * Returns 1 if it matches, 0 if not and a negative value in case of error
 5484:  */
 5485: int
 5486: xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
 5487:     if ((comp == NULL) || (content == NULL))
 5488: 	return(-1);
 5489:     return(xmlFARegExec(comp, content));
 5490: }
 5491: 
 5492: /**
 5493:  * xmlRegexpIsDeterminist:
 5494:  * @comp:  the compiled regular expression
 5495:  *
 5496:  * Check if the regular expression is determinist
 5497:  *
 5498:  * Returns 1 if it yes, 0 if not and a negative value in case of error
 5499:  */
 5500: int
 5501: xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
 5502:     xmlAutomataPtr am;
 5503:     int ret;
 5504: 
 5505:     if (comp == NULL)
 5506: 	return(-1);
 5507:     if (comp->determinist != -1)
 5508: 	return(comp->determinist);
 5509: 
 5510:     am = xmlNewAutomata();
 5511:     if (am->states != NULL) {
 5512: 	int i;
 5513: 
 5514: 	for (i = 0;i < am->nbStates;i++)
 5515: 	    xmlRegFreeState(am->states[i]);
 5516: 	xmlFree(am->states);
 5517:     }
 5518:     am->nbAtoms = comp->nbAtoms;
 5519:     am->atoms = comp->atoms;
 5520:     am->nbStates = comp->nbStates;
 5521:     am->states = comp->states;
 5522:     am->determinist = -1;
 5523:     am->flags = comp->flags;
 5524:     ret = xmlFAComputesDeterminism(am);
 5525:     am->atoms = NULL;
 5526:     am->states = NULL;
 5527:     xmlFreeAutomata(am);
 5528:     comp->determinist = ret;
 5529:     return(ret);
 5530: }
 5531: 
 5532: /**
 5533:  * xmlRegFreeRegexp:
 5534:  * @regexp:  the regexp
 5535:  *
 5536:  * Free a regexp
 5537:  */
 5538: void
 5539: xmlRegFreeRegexp(xmlRegexpPtr regexp) {
 5540:     int i;
 5541:     if (regexp == NULL)
 5542: 	return;
 5543: 
 5544:     if (regexp->string != NULL)
 5545: 	xmlFree(regexp->string);
 5546:     if (regexp->states != NULL) {
 5547: 	for (i = 0;i < regexp->nbStates;i++)
 5548: 	    xmlRegFreeState(regexp->states[i]);
 5549: 	xmlFree(regexp->states);
 5550:     }
 5551:     if (regexp->atoms != NULL) {
 5552: 	for (i = 0;i < regexp->nbAtoms;i++)
 5553: 	    xmlRegFreeAtom(regexp->atoms[i]);
 5554: 	xmlFree(regexp->atoms);
 5555:     }
 5556:     if (regexp->counters != NULL)
 5557: 	xmlFree(regexp->counters);
 5558:     if (regexp->compact != NULL)
 5559: 	xmlFree(regexp->compact);
 5560:     if (regexp->transdata != NULL)
 5561: 	xmlFree(regexp->transdata);
 5562:     if (regexp->stringMap != NULL) {
 5563: 	for (i = 0; i < regexp->nbstrings;i++)
 5564: 	    xmlFree(regexp->stringMap[i]);
 5565: 	xmlFree(regexp->stringMap);
 5566:     }
 5567: 
 5568:     xmlFree(regexp);
 5569: }
 5570: 
 5571: #ifdef LIBXML_AUTOMATA_ENABLED
 5572: /************************************************************************
 5573:  * 									*
 5574:  * 			The Automata interface				*
 5575:  * 									*
 5576:  ************************************************************************/
 5577: 
 5578: /**
 5579:  * xmlNewAutomata:
 5580:  *
 5581:  * Create a new automata
 5582:  *
 5583:  * Returns the new object or NULL in case of failure
 5584:  */
 5585: xmlAutomataPtr
 5586: xmlNewAutomata(void) {
 5587:     xmlAutomataPtr ctxt;
 5588: 
 5589:     ctxt = xmlRegNewParserCtxt(NULL);
 5590:     if (ctxt == NULL)
 5591: 	return(NULL);
 5592: 
 5593:     /* initialize the parser */
 5594:     ctxt->end = NULL;
 5595:     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
 5596:     if (ctxt->start == NULL) {
 5597: 	xmlFreeAutomata(ctxt);
 5598: 	return(NULL);
 5599:     }
 5600:     ctxt->start->type = XML_REGEXP_START_STATE;
 5601:     if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
 5602:         xmlRegFreeState(ctxt->start);
 5603: 	xmlFreeAutomata(ctxt);
 5604: 	return(NULL);
 5605:     }
 5606:     ctxt->flags = 0;
 5607: 
 5608:     return(ctxt);
 5609: }
 5610: 
 5611: /**
 5612:  * xmlFreeAutomata:
 5613:  * @am: an automata
 5614:  *
 5615:  * Free an automata
 5616:  */
 5617: void
 5618: xmlFreeAutomata(xmlAutomataPtr am) {
 5619:     if (am == NULL)
 5620: 	return;
 5621:     xmlRegFreeParserCtxt(am);
 5622: }
 5623: 
 5624: /**
 5625:  * xmlAutomataSetFlags:
 5626:  * @am: an automata
 5627:  * @flags:  a set of internal flags
 5628:  *
 5629:  * Set some flags on the automata
 5630:  */
 5631: void
 5632: xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
 5633:     if (am == NULL)
 5634: 	return;
 5635:     am->flags |= flags;
 5636: }
 5637: 
 5638: /**
 5639:  * xmlAutomataGetInitState:
 5640:  * @am: an automata
 5641:  *
 5642:  * Initial state lookup
 5643:  *
 5644:  * Returns the initial state of the automata
 5645:  */
 5646: xmlAutomataStatePtr
 5647: xmlAutomataGetInitState(xmlAutomataPtr am) {
 5648:     if (am == NULL)
 5649: 	return(NULL);
 5650:     return(am->start);
 5651: }
 5652: 
 5653: /**
 5654:  * xmlAutomataSetFinalState:
 5655:  * @am: an automata
 5656:  * @state: a state in this automata
 5657:  *
 5658:  * Makes that state a final state
 5659:  *
 5660:  * Returns 0 or -1 in case of error
 5661:  */
 5662: int
 5663: xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
 5664:     if ((am == NULL) || (state == NULL))
 5665: 	return(-1);
 5666:     state->type = XML_REGEXP_FINAL_STATE;
 5667:     return(0);
 5668: }
 5669: 
 5670: /**
 5671:  * xmlAutomataNewTransition:
 5672:  * @am: an automata
 5673:  * @from: the starting point of the transition
 5674:  * @to: the target point of the transition or NULL
 5675:  * @token: the input string associated to that transition
 5676:  * @data: data passed to the callback function if the transition is activated
 5677:  *
 5678:  * If @to is NULL, this creates first a new target state in the automata
 5679:  * and then adds a transition from the @from state to the target state
 5680:  * activated by the value of @token
 5681:  *
 5682:  * Returns the target state or NULL in case of error
 5683:  */
 5684: xmlAutomataStatePtr
 5685: xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5686: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5687: 			 void *data) {
 5688:     xmlRegAtomPtr atom;
 5689: 
 5690:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5691: 	return(NULL);
 5692:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5693:     if (atom == NULL)
 5694:         return(NULL);
 5695:     atom->data = data;
 5696:     if (atom == NULL)
 5697: 	return(NULL);
 5698:     atom->valuep = xmlStrdup(token);
 5699: 
 5700:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5701:         xmlRegFreeAtom(atom);
 5702: 	return(NULL);
 5703:     }
 5704:     if (to == NULL)
 5705: 	return(am->state);
 5706:     return(to);
 5707: }
 5708: 
 5709: /**
 5710:  * xmlAutomataNewTransition2:
 5711:  * @am: an automata
 5712:  * @from: the starting point of the transition
 5713:  * @to: the target point of the transition or NULL
 5714:  * @token: the first input string associated to that transition
 5715:  * @token2: the second input string associated to that transition
 5716:  * @data: data passed to the callback function if the transition is activated
 5717:  *
 5718:  * If @to is NULL, this creates first a new target state in the automata
 5719:  * and then adds a transition from the @from state to the target state
 5720:  * activated by the value of @token
 5721:  *
 5722:  * Returns the target state or NULL in case of error
 5723:  */
 5724: xmlAutomataStatePtr
 5725: xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5726: 			  xmlAutomataStatePtr to, const xmlChar *token,
 5727: 			  const xmlChar *token2, void *data) {
 5728:     xmlRegAtomPtr atom;
 5729: 
 5730:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5731: 	return(NULL);
 5732:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5733:     if (atom == NULL)
 5734: 	return(NULL);
 5735:     atom->data = data;
 5736:     if ((token2 == NULL) || (*token2 == 0)) {
 5737: 	atom->valuep = xmlStrdup(token);
 5738:     } else {
 5739: 	int lenn, lenp;
 5740: 	xmlChar *str;
 5741: 
 5742: 	lenn = strlen((char *) token2);
 5743: 	lenp = strlen((char *) token);
 5744: 
 5745: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5746: 	if (str == NULL) {
 5747: 	    xmlRegFreeAtom(atom);
 5748: 	    return(NULL);
 5749: 	}
 5750: 	memcpy(&str[0], token, lenp);
 5751: 	str[lenp] = '|';
 5752: 	memcpy(&str[lenp + 1], token2, lenn);
 5753: 	str[lenn + lenp + 1] = 0;
 5754: 
 5755: 	atom->valuep = str;
 5756:     }
 5757: 
 5758:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5759:         xmlRegFreeAtom(atom);
 5760: 	return(NULL);
 5761:     }
 5762:     if (to == NULL)
 5763: 	return(am->state);
 5764:     return(to);
 5765: }
 5766: 
 5767: /**
 5768:  * xmlAutomataNewNegTrans:
 5769:  * @am: an automata
 5770:  * @from: the starting point of the transition
 5771:  * @to: the target point of the transition or NULL
 5772:  * @token: the first input string associated to that transition
 5773:  * @token2: the second input string associated to that transition
 5774:  * @data: data passed to the callback function if the transition is activated
 5775:  *
 5776:  * If @to is NULL, this creates first a new target state in the automata
 5777:  * and then adds a transition from the @from state to the target state
 5778:  * activated by any value except (@token,@token2)
 5779:  * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
 5780:  # the semantic of XSD ##other
 5781:  *
 5782:  * Returns the target state or NULL in case of error
 5783:  */
 5784: xmlAutomataStatePtr
 5785: xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5786: 		       xmlAutomataStatePtr to, const xmlChar *token,
 5787: 		       const xmlChar *token2, void *data) {
 5788:     xmlRegAtomPtr atom;
 5789:     xmlChar err_msg[200];
 5790: 
 5791:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5792: 	return(NULL);
 5793:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5794:     if (atom == NULL)
 5795: 	return(NULL);
 5796:     atom->data = data;
 5797:     atom->neg = 1;
 5798:     if ((token2 == NULL) || (*token2 == 0)) {
 5799: 	atom->valuep = xmlStrdup(token);
 5800:     } else {
 5801: 	int lenn, lenp;
 5802: 	xmlChar *str;
 5803: 
 5804: 	lenn = strlen((char *) token2);
 5805: 	lenp = strlen((char *) token);
 5806: 
 5807: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5808: 	if (str == NULL) {
 5809: 	    xmlRegFreeAtom(atom);
 5810: 	    return(NULL);
 5811: 	}
 5812: 	memcpy(&str[0], token, lenp);
 5813: 	str[lenp] = '|';
 5814: 	memcpy(&str[lenp + 1], token2, lenn);
 5815: 	str[lenn + lenp + 1] = 0;
 5816: 
 5817: 	atom->valuep = str;
 5818:     }
 5819:     snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
 5820:     err_msg[199] = 0;
 5821:     atom->valuep2 = xmlStrdup(err_msg);
 5822: 
 5823:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5824:         xmlRegFreeAtom(atom);
 5825: 	return(NULL);
 5826:     }
 5827:     am->negs++;
 5828:     if (to == NULL)
 5829: 	return(am->state);
 5830:     return(to);
 5831: }
 5832: 
 5833: /**
 5834:  * xmlAutomataNewCountTrans2:
 5835:  * @am: an automata
 5836:  * @from: the starting point of the transition
 5837:  * @to: the target point of the transition or NULL
 5838:  * @token: the input string associated to that transition
 5839:  * @token2: the second input string associated to that transition
 5840:  * @min:  the minimum successive occurences of token
 5841:  * @max:  the maximum successive occurences of token
 5842:  * @data:  data associated to the transition
 5843:  *
 5844:  * If @to is NULL, this creates first a new target state in the automata
 5845:  * and then adds a transition from the @from state to the target state
 5846:  * activated by a succession of input of value @token and @token2 and 
 5847:  * whose number is between @min and @max
 5848:  *
 5849:  * Returns the target state or NULL in case of error
 5850:  */
 5851: xmlAutomataStatePtr
 5852: xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5853: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5854: 			 const xmlChar *token2,
 5855: 			 int min, int max, void *data) {
 5856:     xmlRegAtomPtr atom;
 5857:     int counter;
 5858: 
 5859:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5860: 	return(NULL);
 5861:     if (min < 0)
 5862: 	return(NULL);
 5863:     if ((max < min) || (max < 1))
 5864: 	return(NULL);
 5865:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5866:     if (atom == NULL)
 5867: 	return(NULL);
 5868:     if ((token2 == NULL) || (*token2 == 0)) {
 5869: 	atom->valuep = xmlStrdup(token);
 5870:     } else {
 5871: 	int lenn, lenp;
 5872: 	xmlChar *str;
 5873: 
 5874: 	lenn = strlen((char *) token2);
 5875: 	lenp = strlen((char *) token);
 5876: 
 5877: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5878: 	if (str == NULL) {
 5879: 	    xmlRegFreeAtom(atom);
 5880: 	    return(NULL);
 5881: 	}
 5882: 	memcpy(&str[0], token, lenp);
 5883: 	str[lenp] = '|';
 5884: 	memcpy(&str[lenp + 1], token2, lenn);
 5885: 	str[lenn + lenp + 1] = 0;
 5886: 
 5887: 	atom->valuep = str;
 5888:     }
 5889:     atom->data = data;
 5890:     if (min == 0)
 5891: 	atom->min = 1;
 5892:     else
 5893: 	atom->min = min;
 5894:     atom->max = max;
 5895: 
 5896:     /*
 5897:      * associate a counter to the transition.
 5898:      */
 5899:     counter = xmlRegGetCounter(am);
 5900:     am->counters[counter].min = min;
 5901:     am->counters[counter].max = max;
 5902: 
 5903:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 5904:     if (to == NULL) {
 5905:         to = xmlRegNewState(am);
 5906: 	xmlRegStatePush(am, to);
 5907:     }
 5908:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 5909:     xmlRegAtomPush(am, atom);
 5910:     am->state = to;
 5911: 
 5912:     if (to == NULL)
 5913: 	to = am->state;
 5914:     if (to == NULL)
 5915: 	return(NULL);
 5916:     if (min == 0)
 5917: 	xmlFAGenerateEpsilonTransition(am, from, to);
 5918:     return(to);
 5919: }
 5920: 
 5921: /**
 5922:  * xmlAutomataNewCountTrans:
 5923:  * @am: an automata
 5924:  * @from: the starting point of the transition
 5925:  * @to: the target point of the transition or NULL
 5926:  * @token: the input string associated to that transition
 5927:  * @min:  the minimum successive occurences of token
 5928:  * @max:  the maximum successive occurences of token
 5929:  * @data:  data associated to the transition
 5930:  *
 5931:  * If @to is NULL, this creates first a new target state in the automata
 5932:  * and then adds a transition from the @from state to the target state
 5933:  * activated by a succession of input of value @token and whose number
 5934:  * is between @min and @max
 5935:  *
 5936:  * Returns the target state or NULL in case of error
 5937:  */
 5938: xmlAutomataStatePtr
 5939: xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5940: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5941: 			 int min, int max, void *data) {
 5942:     xmlRegAtomPtr atom;
 5943:     int counter;
 5944: 
 5945:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5946: 	return(NULL);
 5947:     if (min < 0)
 5948: 	return(NULL);
 5949:     if ((max < min) || (max < 1))
 5950: 	return(NULL);
 5951:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5952:     if (atom == NULL)
 5953: 	return(NULL);
 5954:     atom->valuep = xmlStrdup(token);
 5955:     atom->data = data;
 5956:     if (min == 0)
 5957: 	atom->min = 1;
 5958:     else
 5959: 	atom->min = min;
 5960:     atom->max = max;
 5961: 
 5962:     /*
 5963:      * associate a counter to the transition.
 5964:      */
 5965:     counter = xmlRegGetCounter(am);
 5966:     am->counters[counter].min = min;
 5967:     am->counters[counter].max = max;
 5968: 
 5969:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 5970:     if (to == NULL) {
 5971:         to = xmlRegNewState(am);
 5972: 	xmlRegStatePush(am, to);
 5973:     }
 5974:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 5975:     xmlRegAtomPush(am, atom);
 5976:     am->state = to;
 5977: 
 5978:     if (to == NULL)
 5979: 	to = am->state;
 5980:     if (to == NULL)
 5981: 	return(NULL);
 5982:     if (min == 0)
 5983: 	xmlFAGenerateEpsilonTransition(am, from, to);
 5984:     return(to);
 5985: }
 5986: 
 5987: /**
 5988:  * xmlAutomataNewOnceTrans2:
 5989:  * @am: an automata
 5990:  * @from: the starting point of the transition
 5991:  * @to: the target point of the transition or NULL
 5992:  * @token: the input string associated to that transition
 5993:  * @token2: the second input string associated to that transition
 5994:  * @min:  the minimum successive occurences of token
 5995:  * @max:  the maximum successive occurences of token
 5996:  * @data:  data associated to the transition
 5997:  *
 5998:  * If @to is NULL, this creates first a new target state in the automata
 5999:  * and then adds a transition from the @from state to the target state
 6000:  * activated by a succession of input of value @token and @token2 and whose 
 6001:  * number is between @min and @max, moreover that transition can only be 
 6002:  * crossed once.
 6003:  *
 6004:  * Returns the target state or NULL in case of error
 6005:  */
 6006: xmlAutomataStatePtr
 6007: xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6008: 			 xmlAutomataStatePtr to, const xmlChar *token,
 6009: 			 const xmlChar *token2,
 6010: 			 int min, int max, void *data) {
 6011:     xmlRegAtomPtr atom;
 6012:     int counter;
 6013: 
 6014:     if ((am == NULL) || (from == NULL) || (token == NULL))
 6015: 	return(NULL);
 6016:     if (min < 1)
 6017: 	return(NULL);
 6018:     if ((max < min) || (max < 1))
 6019: 	return(NULL);
 6020:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 6021:     if (atom == NULL)
 6022: 	return(NULL);
 6023:     if ((token2 == NULL) || (*token2 == 0)) {
 6024: 	atom->valuep = xmlStrdup(token);
 6025:     } else {
 6026: 	int lenn, lenp;
 6027: 	xmlChar *str;
 6028: 
 6029: 	lenn = strlen((char *) token2);
 6030: 	lenp = strlen((char *) token);
 6031: 
 6032: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 6033: 	if (str == NULL) {
 6034: 	    xmlRegFreeAtom(atom);
 6035: 	    return(NULL);
 6036: 	}
 6037: 	memcpy(&str[0], token, lenp);
 6038: 	str[lenp] = '|';
 6039: 	memcpy(&str[lenp + 1], token2, lenn);
 6040: 	str[lenn + lenp + 1] = 0;
 6041: 
 6042: 	atom->valuep = str;
 6043:     }    
 6044:     atom->data = data;
 6045:     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
 6046:     atom->min = min;
 6047:     atom->max = max;
 6048:     /*
 6049:      * associate a counter to the transition.
 6050:      */
 6051:     counter = xmlRegGetCounter(am);
 6052:     am->counters[counter].min = 1;
 6053:     am->counters[counter].max = 1;
 6054: 
 6055:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 6056:     if (to == NULL) {
 6057: 	to = xmlRegNewState(am);
 6058: 	xmlRegStatePush(am, to);
 6059:     }
 6060:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 6061:     xmlRegAtomPush(am, atom);
 6062:     am->state = to;
 6063:     return(to);
 6064: }
 6065: 
 6066:     
 6067: 
 6068: /**
 6069:  * xmlAutomataNewOnceTrans:
 6070:  * @am: an automata
 6071:  * @from: the starting point of the transition
 6072:  * @to: the target point of the transition or NULL
 6073:  * @token: the input string associated to that transition
 6074:  * @min:  the minimum successive occurences of token
 6075:  * @max:  the maximum successive occurences of token
 6076:  * @data:  data associated to the transition
 6077:  *
 6078:  * If @to is NULL, this creates first a new target state in the automata
 6079:  * and then adds a transition from the @from state to the target state
 6080:  * activated by a succession of input of value @token and whose number
 6081:  * is between @min and @max, moreover that transition can only be crossed
 6082:  * once.
 6083:  *
 6084:  * Returns the target state or NULL in case of error
 6085:  */
 6086: xmlAutomataStatePtr
 6087: xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6088: 			 xmlAutomataStatePtr to, const xmlChar *token,
 6089: 			 int min, int max, void *data) {
 6090:     xmlRegAtomPtr atom;
 6091:     int counter;
 6092: 
 6093:     if ((am == NULL) || (from == NULL) || (token == NULL))
 6094: 	return(NULL);
 6095:     if (min < 1)
 6096: 	return(NULL);
 6097:     if ((max < min) || (max < 1))
 6098: 	return(NULL);
 6099:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 6100:     if (atom == NULL)
 6101: 	return(NULL);
 6102:     atom->valuep = xmlStrdup(token);
 6103:     atom->data = data;
 6104:     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
 6105:     atom->min = min;
 6106:     atom->max = max;
 6107:     /*
 6108:      * associate a counter to the transition.
 6109:      */
 6110:     counter = xmlRegGetCounter(am);
 6111:     am->counters[counter].min = 1;
 6112:     am->counters[counter].max = 1;
 6113: 
 6114:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 6115:     if (to == NULL) {
 6116: 	to = xmlRegNewState(am);
 6117: 	xmlRegStatePush(am, to);
 6118:     }
 6119:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 6120:     xmlRegAtomPush(am, atom);
 6121:     am->state = to;
 6122:     return(to);
 6123: }
 6124: 
 6125: /**
 6126:  * xmlAutomataNewState:
 6127:  * @am: an automata
 6128:  *
 6129:  * Create a new disconnected state in the automata
 6130:  *
 6131:  * Returns the new state or NULL in case of error
 6132:  */
 6133: xmlAutomataStatePtr
 6134: xmlAutomataNewState(xmlAutomataPtr am) {
 6135:     xmlAutomataStatePtr to; 
 6136: 
 6137:     if (am == NULL)
 6138: 	return(NULL);
 6139:     to = xmlRegNewState(am);
 6140:     xmlRegStatePush(am, to);
 6141:     return(to);
 6142: }
 6143: 
 6144: /**
 6145:  * xmlAutomataNewEpsilon:
 6146:  * @am: an automata
 6147:  * @from: the starting point of the transition
 6148:  * @to: the target point of the transition or NULL
 6149:  *
 6150:  * If @to is NULL, this creates first a new target state in the automata
 6151:  * and then adds an epsilon transition from the @from state to the
 6152:  * target state
 6153:  *
 6154:  * Returns the target state or NULL in case of error
 6155:  */
 6156: xmlAutomataStatePtr
 6157: xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6158: 		      xmlAutomataStatePtr to) {
 6159:     if ((am == NULL) || (from == NULL))
 6160: 	return(NULL);
 6161:     xmlFAGenerateEpsilonTransition(am, from, to);
 6162:     if (to == NULL)
 6163: 	return(am->state);
 6164:     return(to);
 6165: }
 6166: 
 6167: /**
 6168:  * xmlAutomataNewAllTrans:
 6169:  * @am: an automata
 6170:  * @from: the starting point of the transition
 6171:  * @to: the target point of the transition or NULL
 6172:  * @lax: allow to transition if not all all transitions have been activated
 6173:  *
 6174:  * If @to is NULL, this creates first a new target state in the automata
 6175:  * and then adds a an ALL transition from the @from state to the
 6176:  * target state. That transition is an epsilon transition allowed only when
 6177:  * all transitions from the @from node have been activated.
 6178:  *
 6179:  * Returns the target state or NULL in case of error
 6180:  */
 6181: xmlAutomataStatePtr
 6182: xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6183: 		       xmlAutomataStatePtr to, int lax) {
 6184:     if ((am == NULL) || (from == NULL))
 6185: 	return(NULL);
 6186:     xmlFAGenerateAllTransition(am, from, to, lax);
 6187:     if (to == NULL)
 6188: 	return(am->state);
 6189:     return(to);
 6190: }
 6191: 
 6192: /**
 6193:  * xmlAutomataNewCounter:
 6194:  * @am: an automata
 6195:  * @min:  the minimal value on the counter
 6196:  * @max:  the maximal value on the counter
 6197:  *
 6198:  * Create a new counter
 6199:  *
 6200:  * Returns the counter number or -1 in case of error
 6201:  */
 6202: int		
 6203: xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
 6204:     int ret;
 6205: 
 6206:     if (am == NULL)
 6207: 	return(-1);
 6208: 
 6209:     ret = xmlRegGetCounter(am);
 6210:     if (ret < 0)
 6211: 	return(-1);
 6212:     am->counters[ret].min = min;
 6213:     am->counters[ret].max = max;
 6214:     return(ret);
 6215: }
 6216: 
 6217: /**
 6218:  * xmlAutomataNewCountedTrans:
 6219:  * @am: an automata
 6220:  * @from: the starting point of the transition
 6221:  * @to: the target point of the transition or NULL
 6222:  * @counter: the counter associated to that transition
 6223:  *
 6224:  * If @to is NULL, this creates first a new target state in the automata
 6225:  * and then adds an epsilon transition from the @from state to the target state
 6226:  * which will increment the counter provided
 6227:  *
 6228:  * Returns the target state or NULL in case of error
 6229:  */
 6230: xmlAutomataStatePtr
 6231: xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6232: 		xmlAutomataStatePtr to, int counter) {
 6233:     if ((am == NULL) || (from == NULL) || (counter < 0))
 6234: 	return(NULL);
 6235:     xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
 6236:     if (to == NULL)
 6237: 	return(am->state);
 6238:     return(to);
 6239: }
 6240: 
 6241: /**
 6242:  * xmlAutomataNewCounterTrans:
 6243:  * @am: an automata
 6244:  * @from: the starting point of the transition
 6245:  * @to: the target point of the transition or NULL
 6246:  * @counter: the counter associated to that transition
 6247:  *
 6248:  * If @to is NULL, this creates first a new target state in the automata
 6249:  * and then adds an epsilon transition from the @from state to the target state
 6250:  * which will be allowed only if the counter is within the right range.
 6251:  *
 6252:  * Returns the target state or NULL in case of error
 6253:  */
 6254: xmlAutomataStatePtr
 6255: xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6256: 		xmlAutomataStatePtr to, int counter) {
 6257:     if ((am == NULL) || (from == NULL) || (counter < 0))
 6258: 	return(NULL);
 6259:     xmlFAGenerateCountedTransition(am, from, to, counter);
 6260:     if (to == NULL)
 6261: 	return(am->state);
 6262:     return(to);
 6263: }
 6264: 
 6265: /**
 6266:  * xmlAutomataCompile:
 6267:  * @am: an automata
 6268:  *
 6269:  * Compile the automata into a Reg Exp ready for being executed.
 6270:  * The automata should be free after this point.
 6271:  *
 6272:  * Returns the compiled regexp or NULL in case of error
 6273:  */
 6274: xmlRegexpPtr          
 6275: xmlAutomataCompile(xmlAutomataPtr am) {
 6276:     xmlRegexpPtr ret;
 6277: 
 6278:     if ((am == NULL) || (am->error != 0)) return(NULL);
 6279:     xmlFAEliminateEpsilonTransitions(am);
 6280:     /* xmlFAComputesDeterminism(am); */
 6281:     ret = xmlRegEpxFromParse(am);
 6282: 
 6283:     return(ret);
 6284: }
 6285: 
 6286: /**
 6287:  * xmlAutomataIsDeterminist:
 6288:  * @am: an automata
 6289:  *
 6290:  * Checks if an automata is determinist.
 6291:  *
 6292:  * Returns 1 if true, 0 if not, and -1 in case of error
 6293:  */
 6294: int          
 6295: xmlAutomataIsDeterminist(xmlAutomataPtr am) {
 6296:     int ret;
 6297: 
 6298:     if (am == NULL)
 6299: 	return(-1);
 6300: 
 6301:     ret = xmlFAComputesDeterminism(am);
 6302:     return(ret);
 6303: }
 6304: #endif /* LIBXML_AUTOMATA_ENABLED */
 6305: 
 6306: #ifdef LIBXML_EXPR_ENABLED
 6307: /************************************************************************
 6308:  *									*
 6309:  *		Formal Expression handling code				*
 6310:  *									*
 6311:  ************************************************************************/
 6312: /************************************************************************
 6313:  *									*
 6314:  *		Expression handling context				*
 6315:  *									*
 6316:  ************************************************************************/
 6317: 
 6318: struct _xmlExpCtxt {
 6319:     xmlDictPtr dict;
 6320:     xmlExpNodePtr *table;
 6321:     int size;
 6322:     int nbElems;
 6323:     int nb_nodes;
 6324:     int maxNodes;
 6325:     const char *expr;
 6326:     const char *cur;
 6327:     int nb_cons;
 6328:     int tabSize;
 6329: };
 6330: 
 6331: /**
 6332:  * xmlExpNewCtxt:
 6333:  * @maxNodes:  the maximum number of nodes
 6334:  * @dict:  optional dictionnary to use internally
 6335:  *
 6336:  * Creates a new context for manipulating expressions
 6337:  *
 6338:  * Returns the context or NULL in case of error
 6339:  */
 6340: xmlExpCtxtPtr
 6341: xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
 6342:     xmlExpCtxtPtr ret;
 6343:     int size = 256;
 6344: 
 6345:     if (maxNodes <= 4096)
 6346:         maxNodes = 4096;
 6347:     
 6348:     ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
 6349:     if (ret == NULL)
 6350:         return(NULL);
 6351:     memset(ret, 0, sizeof(xmlExpCtxt));
 6352:     ret->size = size;
 6353:     ret->nbElems = 0;
 6354:     ret->maxNodes = maxNodes;
 6355:     ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
 6356:     if (ret->table == NULL) {
 6357:         xmlFree(ret);
 6358: 	return(NULL);
 6359:     }
 6360:     memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
 6361:     if (dict == NULL) {
 6362:         ret->dict = xmlDictCreate();
 6363: 	if (ret->dict == NULL) {
 6364: 	    xmlFree(ret->table);
 6365: 	    xmlFree(ret);
 6366: 	    return(NULL);
 6367: 	}
 6368:     } else {
 6369:         ret->dict = dict;
 6370: 	xmlDictReference(ret->dict);
 6371:     }
 6372:     return(ret);
 6373: }
 6374: 
 6375: /**
 6376:  * xmlExpFreeCtxt:
 6377:  * @ctxt:  an expression context
 6378:  *
 6379:  * Free an expression context
 6380:  */
 6381: void
 6382: xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
 6383:     if (ctxt == NULL)
 6384:         return;
 6385:     xmlDictFree(ctxt->dict);
 6386:     if (ctxt->table != NULL)
 6387: 	xmlFree(ctxt->table);
 6388:     xmlFree(ctxt);
 6389: }
 6390: 
 6391: /************************************************************************
 6392:  *									*
 6393:  *		Structure associated to an expression node		*
 6394:  *									*
 6395:  ************************************************************************/
 6396: #define MAX_NODES 10000
 6397: 
 6398: /* #define DEBUG_DERIV */
 6399: 
 6400: /*
 6401:  * TODO: 
 6402:  * - Wildcards
 6403:  * - public API for creation
 6404:  *
 6405:  * Started
 6406:  * - regression testing
 6407:  *
 6408:  * Done
 6409:  * - split into module and test tool
 6410:  * - memleaks
 6411:  */
 6412: 
 6413: typedef enum {
 6414:     XML_EXP_NILABLE = (1 << 0)
 6415: } xmlExpNodeInfo;
 6416: 
 6417: #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
 6418: 
 6419: struct _xmlExpNode {
 6420:     unsigned char type;/* xmlExpNodeType */
 6421:     unsigned char info;/* OR of xmlExpNodeInfo */
 6422:     unsigned short key;	/* the hash key */
 6423:     unsigned int ref;	/* The number of references */
 6424:     int c_max;		/* the maximum length it can consume */
 6425:     xmlExpNodePtr exp_left;
 6426:     xmlExpNodePtr next;/* the next node in the hash table or free list */
 6427:     union {
 6428: 	struct {
 6429: 	    int f_min;
 6430: 	    int f_max;
 6431: 	} count;
 6432: 	struct {
 6433: 	    xmlExpNodePtr f_right;
 6434: 	} children;
 6435:         const xmlChar *f_str;
 6436:     } field;
 6437: };
 6438: 
 6439: #define exp_min field.count.f_min
 6440: #define exp_max field.count.f_max
 6441: /* #define exp_left field.children.f_left */
 6442: #define exp_right field.children.f_right
 6443: #define exp_str field.f_str
 6444: 
 6445: static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
 6446: static xmlExpNode forbiddenExpNode = {
 6447:     XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
 6448: };
 6449: xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
 6450: static xmlExpNode emptyExpNode = {
 6451:     XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
 6452: };
 6453: xmlExpNodePtr emptyExp = &emptyExpNode;
 6454: 
 6455: /************************************************************************
 6456:  *									*
 6457:  *  The custom hash table for unicity and canonicalization		*
 6458:  *  of sub-expressions pointers						*
 6459:  *									*
 6460:  ************************************************************************/
 6461: /*
 6462:  * xmlExpHashNameComputeKey:
 6463:  * Calculate the hash key for a token
 6464:  */
 6465: static unsigned short
 6466: xmlExpHashNameComputeKey(const xmlChar *name) {
 6467:     unsigned short value = 0L;
 6468:     char ch;
 6469:     
 6470:     if (name != NULL) {
 6471: 	value += 30 * (*name);
 6472: 	while ((ch = *name++) != 0) {
 6473: 	    value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
 6474: 	}
 6475:     }
 6476:     return (value);
 6477: }
 6478: 
 6479: /*
 6480:  * xmlExpHashComputeKey:
 6481:  * Calculate the hash key for a compound expression
 6482:  */
 6483: static unsigned short
 6484: xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
 6485:                      xmlExpNodePtr right) {
 6486:     unsigned long value;
 6487:     unsigned short ret;
 6488:     
 6489:     switch (type) {
 6490:         case XML_EXP_SEQ:
 6491: 	    value = left->key;
 6492: 	    value += right->key;
 6493: 	    value *= 3;
 6494: 	    ret = (unsigned short) value;
 6495: 	    break;
 6496:         case XML_EXP_OR:
 6497: 	    value = left->key;
 6498: 	    value += right->key;
 6499: 	    value *= 7;
 6500: 	    ret = (unsigned short) value;
 6501: 	    break;
 6502:         case XML_EXP_COUNT:
 6503: 	    value = left->key;
 6504: 	    value += right->key;
 6505: 	    ret = (unsigned short) value;
 6506: 	    break;
 6507: 	default:
 6508: 	    ret = 0;
 6509:     }
 6510:     return(ret);
 6511: }
 6512: 
 6513: 
 6514: static xmlExpNodePtr
 6515: xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
 6516:     xmlExpNodePtr ret;
 6517: 
 6518:     if (ctxt->nb_nodes >= MAX_NODES)
 6519:         return(NULL);
 6520:     ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
 6521:     if (ret == NULL)
 6522:         return(NULL);
 6523:     memset(ret, 0, sizeof(xmlExpNode));
 6524:     ret->type = type;
 6525:     ret->next = NULL;
 6526:     ctxt->nb_nodes++;
 6527:     ctxt->nb_cons++;
 6528:     return(ret);
 6529: }
 6530: 
 6531: /**
 6532:  * xmlExpHashGetEntry:
 6533:  * @table: the hash table
 6534:  *
 6535:  * Get the unique entry from the hash table. The entry is created if
 6536:  * needed. @left and @right are consumed, i.e. their ref count will
 6537:  * be decremented by the operation.
 6538:  *
 6539:  * Returns the pointer or NULL in case of error
 6540:  */
 6541: static xmlExpNodePtr
 6542: xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
 6543:                    xmlExpNodePtr left, xmlExpNodePtr right,
 6544: 		   const xmlChar *name, int min, int max) {
 6545:     unsigned short kbase, key;
 6546:     xmlExpNodePtr entry;
 6547:     xmlExpNodePtr insert;
 6548: 
 6549:     if (ctxt == NULL)
 6550: 	return(NULL);
 6551: 
 6552:     /*
 6553:      * Check for duplicate and insertion location.
 6554:      */
 6555:     if (type == XML_EXP_ATOM) {
 6556: 	kbase = xmlExpHashNameComputeKey(name);
 6557:     } else if (type == XML_EXP_COUNT) {
 6558:         /* COUNT reduction rule 1 */
 6559: 	/* a{1} -> a */
 6560: 	if (min == max) {
 6561: 	    if (min == 1) {
 6562: 		return(left);
 6563: 	    }
 6564: 	    if (min == 0) {
 6565: 		xmlExpFree(ctxt, left);
 6566: 	        return(emptyExp);
 6567: 	    }
 6568: 	}
 6569: 	if (min < 0) {
 6570: 	    xmlExpFree(ctxt, left);
 6571: 	    return(forbiddenExp);
 6572: 	}
 6573:         if (max == -1)
 6574: 	    kbase = min + 79;
 6575: 	else
 6576: 	    kbase = max - min;
 6577: 	kbase += left->key;
 6578:     } else if (type == XML_EXP_OR) {
 6579:         /* Forbid reduction rules */
 6580:         if (left->type == XML_EXP_FORBID) {
 6581: 	    xmlExpFree(ctxt, left);
 6582: 	    return(right);
 6583: 	}
 6584:         if (right->type == XML_EXP_FORBID) {
 6585: 	    xmlExpFree(ctxt, right);
 6586: 	    return(left);
 6587: 	}
 6588: 
 6589:         /* OR reduction rule 1 */
 6590: 	/* a | a reduced to a */
 6591:         if (left == right) {
 6592: 	    left->ref--;
 6593: 	    return(left);
 6594: 	}
 6595:         /* OR canonicalization rule 1 */
 6596: 	/* linearize (a | b) | c into a | (b | c) */
 6597:         if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
 6598: 	    xmlExpNodePtr tmp = left;
 6599:             left = right;
 6600: 	    right = tmp;
 6601: 	}
 6602:         /* OR reduction rule 2 */
 6603: 	/* a | (a | b) and b | (a | b) are reduced to a | b */
 6604:         if (right->type == XML_EXP_OR) {
 6605: 	    if ((left == right->exp_left) ||
 6606: 	        (left == right->exp_right)) {
 6607: 		xmlExpFree(ctxt, left);
 6608: 		return(right);
 6609: 	    }
 6610: 	}
 6611:         /* OR canonicalization rule 2 */
 6612: 	/* linearize (a | b) | c into a | (b | c) */
 6613:         if (left->type == XML_EXP_OR) {
 6614: 	    xmlExpNodePtr tmp;
 6615: 
 6616: 	    /* OR canonicalization rule 2 */
 6617: 	    if ((left->exp_right->type != XML_EXP_OR) &&
 6618: 	        (left->exp_right->key < left->exp_left->key)) {
 6619: 	        tmp = left->exp_right;
 6620: 		left->exp_right = left->exp_left;
 6621: 		left->exp_left = tmp;
 6622: 	    }
 6623: 	    left->exp_right->ref++;
 6624: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
 6625: 	                             NULL, 0, 0);
 6626: 	    left->exp_left->ref++;
 6627: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
 6628: 	                             NULL, 0, 0);
 6629: 	
 6630: 	    xmlExpFree(ctxt, left);
 6631: 	    return(tmp);
 6632: 	}
 6633: 	if (right->type == XML_EXP_OR) {
 6634: 	    /* Ordering in the tree */
 6635: 	    /* C | (A | B) -> A | (B | C) */
 6636: 	    if (left->key > right->exp_right->key) {
 6637: 		xmlExpNodePtr tmp;
 6638: 		right->exp_right->ref++;
 6639: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
 6640: 		                         left, NULL, 0, 0);
 6641: 		right->exp_left->ref++;
 6642: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
 6643: 		                         tmp, NULL, 0, 0);
 6644: 		xmlExpFree(ctxt, right);
 6645: 		return(tmp);
 6646: 	    }
 6647: 	    /* Ordering in the tree */
 6648: 	    /* B | (A | C) -> A | (B | C) */
 6649: 	    if (left->key > right->exp_left->key) {
 6650: 		xmlExpNodePtr tmp;
 6651: 		right->exp_right->ref++;
 6652: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
 6653: 		                         right->exp_right, NULL, 0, 0);
 6654: 		right->exp_left->ref++;
 6655: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
 6656: 		                         tmp, NULL, 0, 0);
 6657: 		xmlExpFree(ctxt, right);
 6658: 		return(tmp);
 6659: 	    }
 6660: 	}
 6661: 	/* we know both types are != XML_EXP_OR here */
 6662:         else if (left->key > right->key) {
 6663: 	    xmlExpNodePtr tmp = left;
 6664:             left = right;
 6665: 	    right = tmp;
 6666: 	}
 6667: 	kbase = xmlExpHashComputeKey(type, left, right);
 6668:     } else if (type == XML_EXP_SEQ) {
 6669:         /* Forbid reduction rules */
 6670:         if (left->type == XML_EXP_FORBID) {
 6671: 	    xmlExpFree(ctxt, right);
 6672: 	    return(left);
 6673: 	}
 6674:         if (right->type == XML_EXP_FORBID) {
 6675: 	    xmlExpFree(ctxt, left);
 6676: 	    return(right);
 6677: 	}
 6678:         /* Empty reduction rules */
 6679:         if (right->type == XML_EXP_EMPTY) {
 6680: 	    return(left);
 6681: 	}
 6682:         if (left->type == XML_EXP_EMPTY) {
 6683: 	    return(right);
 6684: 	}
 6685: 	kbase = xmlExpHashComputeKey(type, left, right);
 6686:     } else 
 6687:         return(NULL);
 6688: 
 6689:     key = kbase % ctxt->size;
 6690:     if (ctxt->table[key] != NULL) {
 6691: 	for (insert = ctxt->table[key]; insert != NULL;
 6692: 	     insert = insert->next) {
 6693: 	    if ((insert->key == kbase) &&
 6694: 	        (insert->type == type)) {
 6695: 		if (type == XML_EXP_ATOM) {
 6696: 		    if (name == insert->exp_str) {
 6697: 			insert->ref++;
 6698: 			return(insert);
 6699: 		    }
 6700: 		} else if (type == XML_EXP_COUNT) {
 6701: 		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
 6702: 		        (insert->exp_left == left)) {
 6703: 			insert->ref++;
 6704: 			left->ref--;
 6705: 			return(insert);
 6706: 		    }
 6707: 		} else if ((insert->exp_left == left) &&
 6708: 			   (insert->exp_right == right)) {
 6709: 		    insert->ref++;
 6710: 		    left->ref--;
 6711: 		    right->ref--;
 6712: 		    return(insert);
 6713: 		}
 6714: 	    }
 6715: 	}
 6716:     }
 6717: 
 6718:     entry = xmlExpNewNode(ctxt, type);
 6719:     if (entry == NULL)
 6720:         return(NULL);
 6721:     entry->key = kbase;
 6722:     if (type == XML_EXP_ATOM) {
 6723: 	entry->exp_str = name;
 6724: 	entry->c_max = 1;
 6725:     } else if (type == XML_EXP_COUNT) {
 6726:         entry->exp_min = min;
 6727:         entry->exp_max = max;
 6728: 	entry->exp_left = left;
 6729: 	if ((min == 0) || (IS_NILLABLE(left)))
 6730: 	    entry->info |= XML_EXP_NILABLE;
 6731: 	if (max < 0)
 6732: 	    entry->c_max = -1;
 6733: 	else
 6734: 	    entry->c_max = max * entry->exp_left->c_max;
 6735:     } else {
 6736: 	entry->exp_left = left;
 6737: 	entry->exp_right = right;
 6738: 	if (type == XML_EXP_OR) {
 6739: 	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
 6740: 		entry->info |= XML_EXP_NILABLE;
 6741: 	    if ((entry->exp_left->c_max == -1) ||
 6742: 	        (entry->exp_right->c_max == -1))
 6743: 		entry->c_max = -1;
 6744: 	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
 6745: 	        entry->c_max = entry->exp_left->c_max;
 6746: 	    else
 6747: 	        entry->c_max = entry->exp_right->c_max;
 6748: 	} else {
 6749: 	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
 6750: 		entry->info |= XML_EXP_NILABLE;
 6751: 	    if ((entry->exp_left->c_max == -1) ||
 6752: 	        (entry->exp_right->c_max == -1))
 6753: 		entry->c_max = -1;
 6754: 	    else
 6755: 	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
 6756: 	}
 6757:     }
 6758:     entry->ref = 1;
 6759:     if (ctxt->table[key] != NULL)
 6760:         entry->next = ctxt->table[key];
 6761: 
 6762:     ctxt->table[key] = entry;
 6763:     ctxt->nbElems++;
 6764: 
 6765:     return(entry);
 6766: }
 6767: 
 6768: /**
 6769:  * xmlExpFree:
 6770:  * @ctxt: the expression context
 6771:  * @exp: the expression
 6772:  *
 6773:  * Dereference the expression
 6774:  */
 6775: void
 6776: xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
 6777:     if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
 6778:         return;
 6779:     exp->ref--;
 6780:     if (exp->ref == 0) {
 6781:         unsigned short key;
 6782: 
 6783:         /* Unlink it first from the hash table */
 6784: 	key = exp->key % ctxt->size;
 6785: 	if (ctxt->table[key] == exp) {
 6786: 	    ctxt->table[key] = exp->next;
 6787: 	} else {
 6788: 	    xmlExpNodePtr tmp;
 6789: 
 6790: 	    tmp = ctxt->table[key];
 6791: 	    while (tmp != NULL) {
 6792: 	        if (tmp->next == exp) {
 6793: 		    tmp->next = exp->next;
 6794: 		    break;
 6795: 		}
 6796: 	        tmp = tmp->next;
 6797: 	    }
 6798: 	}
 6799: 
 6800:         if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
 6801: 	    xmlExpFree(ctxt, exp->exp_left);
 6802: 	    xmlExpFree(ctxt, exp->exp_right);
 6803: 	} else if (exp->type == XML_EXP_COUNT) {
 6804: 	    xmlExpFree(ctxt, exp->exp_left);
 6805: 	}
 6806:         xmlFree(exp);
 6807: 	ctxt->nb_nodes--;
 6808:     }
 6809: }
 6810: 
 6811: /**
 6812:  * xmlExpRef:
 6813:  * @exp: the expression
 6814:  *
 6815:  * Increase the reference count of the expression
 6816:  */
 6817: void
 6818: xmlExpRef(xmlExpNodePtr exp) {
 6819:     if (exp != NULL)
 6820:         exp->ref++;
 6821: }
 6822: 
 6823: /**
 6824:  * xmlExpNewAtom:
 6825:  * @ctxt: the expression context
 6826:  * @name: the atom name
 6827:  * @len: the atom name lenght in byte (or -1);
 6828:  *
 6829:  * Get the atom associated to this name from that context
 6830:  *
 6831:  * Returns the node or NULL in case of error
 6832:  */
 6833: xmlExpNodePtr
 6834: xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
 6835:     if ((ctxt == NULL) || (name == NULL))
 6836:         return(NULL);
 6837:     name = xmlDictLookup(ctxt->dict, name, len);
 6838:     if (name == NULL)
 6839:         return(NULL);
 6840:     return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
 6841: }
 6842: 
 6843: /**
 6844:  * xmlExpNewOr:
 6845:  * @ctxt: the expression context
 6846:  * @left: left expression
 6847:  * @right: right expression
 6848:  *
 6849:  * Get the atom associated to the choice @left | @right
 6850:  * Note that @left and @right are consumed in the operation, to keep
 6851:  * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
 6852:  * this is true even in case of failure (unless ctxt == NULL).
 6853:  *
 6854:  * Returns the node or NULL in case of error
 6855:  */
 6856: xmlExpNodePtr
 6857: xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
 6858:     if (ctxt == NULL)
 6859:         return(NULL);
 6860:     if ((left == NULL) || (right == NULL)) {
 6861:         xmlExpFree(ctxt, left);
 6862:         xmlExpFree(ctxt, right);
 6863:         return(NULL);
 6864:     }
 6865:     return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
 6866: }
 6867: 
 6868: /**
 6869:  * xmlExpNewSeq:
 6870:  * @ctxt: the expression context
 6871:  * @left: left expression
 6872:  * @right: right expression
 6873:  *
 6874:  * Get the atom associated to the sequence @left , @right
 6875:  * Note that @left and @right are consumed in the operation, to keep
 6876:  * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
 6877:  * this is true even in case of failure (unless ctxt == NULL).
 6878:  *
 6879:  * Returns the node or NULL in case of error
 6880:  */
 6881: xmlExpNodePtr
 6882: xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
 6883:     if (ctxt == NULL)
 6884:         return(NULL);
 6885:     if ((left == NULL) || (right == NULL)) {
 6886:         xmlExpFree(ctxt, left);
 6887:         xmlExpFree(ctxt, right);
 6888:         return(NULL);
 6889:     }
 6890:     return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
 6891: }
 6892: 
 6893: /**
 6894:  * xmlExpNewRange:
 6895:  * @ctxt: the expression context
 6896:  * @subset: the expression to be repeated
 6897:  * @min: the lower bound for the repetition
 6898:  * @max: the upper bound for the repetition, -1 means infinite
 6899:  *
 6900:  * Get the atom associated to the range (@subset){@min, @max}
 6901:  * Note that @subset is consumed in the operation, to keep
 6902:  * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
 6903:  * this is true even in case of failure (unless ctxt == NULL).
 6904:  *
 6905:  * Returns the node or NULL in case of error
 6906:  */
 6907: xmlExpNodePtr
 6908: xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
 6909:     if (ctxt == NULL)
 6910:         return(NULL);
 6911:     if ((subset == NULL) || (min < 0) || (max < -1) ||
 6912:         ((max >= 0) && (min > max))) {
 6913: 	xmlExpFree(ctxt, subset);
 6914:         return(NULL);
 6915:     }
 6916:     return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
 6917:                               NULL, NULL, min, max));
 6918: }
 6919: 
 6920: /************************************************************************
 6921:  *									*
 6922:  *		Public API for operations on expressions		*
 6923:  *									*
 6924:  ************************************************************************/
 6925: 
 6926: static int
 6927: xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 6928:                      const xmlChar**list, int len, int nb) {
 6929:     int tmp, tmp2;
 6930: tail:
 6931:     switch (exp->type) {
 6932:         case XML_EXP_EMPTY:
 6933: 	    return(0);
 6934:         case XML_EXP_ATOM:
 6935: 	    for (tmp = 0;tmp < nb;tmp++)
 6936: 	        if (list[tmp] == exp->exp_str)
 6937: 		    return(0);
 6938:             if (nb >= len)
 6939: 	        return(-2);
 6940: 	    list[nb] = exp->exp_str;
 6941: 	    return(1);
 6942:         case XML_EXP_COUNT:
 6943: 	    exp = exp->exp_left;
 6944: 	    goto tail;
 6945:         case XML_EXP_SEQ:
 6946:         case XML_EXP_OR:
 6947: 	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
 6948: 	    if (tmp < 0)
 6949: 	        return(tmp);
 6950: 	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
 6951: 	                                nb + tmp);
 6952: 	    if (tmp2 < 0)
 6953: 	        return(tmp2);
 6954:             return(tmp + tmp2);
 6955:     }
 6956:     return(-1);
 6957: }
 6958: 
 6959: /**
 6960:  * xmlExpGetLanguage:
 6961:  * @ctxt: the expression context
 6962:  * @exp: the expression
 6963:  * @langList: where to store the tokens
 6964:  * @len: the allocated lenght of @list
 6965:  *
 6966:  * Find all the strings used in @exp and store them in @list
 6967:  *
 6968:  * Returns the number of unique strings found, -1 in case of errors and
 6969:  *         -2 if there is more than @len strings
 6970:  */
 6971: int
 6972: xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 6973:                   const xmlChar**langList, int len) {
 6974:     if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
 6975:         return(-1);
 6976:     return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
 6977: }
 6978: 
 6979: static int
 6980: xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 6981:                   const xmlChar**list, int len, int nb) {
 6982:     int tmp, tmp2;
 6983: tail:
 6984:     switch (exp->type) {
 6985:         case XML_EXP_FORBID:
 6986: 	    return(0);
 6987:         case XML_EXP_EMPTY:
 6988: 	    return(0);
 6989:         case XML_EXP_ATOM:
 6990: 	    for (tmp = 0;tmp < nb;tmp++)
 6991: 	        if (list[tmp] == exp->exp_str)
 6992: 		    return(0);
 6993:             if (nb >= len)
 6994: 	        return(-2);
 6995: 	    list[nb] = exp->exp_str;
 6996: 	    return(1);
 6997:         case XML_EXP_COUNT:
 6998: 	    exp = exp->exp_left;
 6999: 	    goto tail;
 7000:         case XML_EXP_SEQ:
 7001: 	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
 7002: 	    if (tmp < 0)
 7003: 	        return(tmp);
 7004: 	    if (IS_NILLABLE(exp->exp_left)) {
 7005: 		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
 7006: 					    nb + tmp);
 7007: 		if (tmp2 < 0)
 7008: 		    return(tmp2);
 7009: 		tmp += tmp2;
 7010: 	    }
 7011:             return(tmp);
 7012:         case XML_EXP_OR:
 7013: 	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
 7014: 	    if (tmp < 0)
 7015: 	        return(tmp);
 7016: 	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
 7017: 	                                nb + tmp);
 7018: 	    if (tmp2 < 0)
 7019: 	        return(tmp2);
 7020:             return(tmp + tmp2);
 7021:     }
 7022:     return(-1);
 7023: }
 7024: 
 7025: /**
 7026:  * xmlExpGetStart:
 7027:  * @ctxt: the expression context
 7028:  * @exp: the expression
 7029:  * @tokList: where to store the tokens
 7030:  * @len: the allocated lenght of @list
 7031:  *
 7032:  * Find all the strings that appears at the start of the languages
 7033:  * accepted by @exp and store them in @list. E.g. for (a, b) | c
 7034:  * it will return the list [a, c]
 7035:  *
 7036:  * Returns the number of unique strings found, -1 in case of errors and
 7037:  *         -2 if there is more than @len strings
 7038:  */
 7039: int
 7040: xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 7041:                const xmlChar**tokList, int len) {
 7042:     if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
 7043:         return(-1);
 7044:     return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
 7045: }
 7046: 
 7047: /**
 7048:  * xmlExpIsNillable:
 7049:  * @exp: the expression
 7050:  *
 7051:  * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
 7052:  *
 7053:  * Returns 1 if nillable, 0 if not and -1 in case of error
 7054:  */
 7055: int
 7056: xmlExpIsNillable(xmlExpNodePtr exp) {
 7057:     if (exp == NULL)
 7058:         return(-1);
 7059:     return(IS_NILLABLE(exp) != 0);
 7060: }
 7061: 
 7062: static xmlExpNodePtr
 7063: xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
 7064: {
 7065:     xmlExpNodePtr ret;
 7066: 
 7067:     switch (exp->type) {
 7068: 	case XML_EXP_EMPTY:
 7069: 	    return(forbiddenExp);
 7070: 	case XML_EXP_FORBID:
 7071: 	    return(forbiddenExp);
 7072: 	case XML_EXP_ATOM:
 7073: 	    if (exp->exp_str == str) {
 7074: #ifdef DEBUG_DERIV
 7075: 		printf("deriv atom: equal => Empty\n");
 7076: #endif
 7077: 	        ret = emptyExp;
 7078: 	    } else {
 7079: #ifdef DEBUG_DERIV
 7080: 		printf("deriv atom: mismatch => forbid\n");
 7081: #endif
 7082: 	        /* TODO wildcards here */
 7083: 		ret = forbiddenExp;
 7084: 	    }
 7085: 	    return(ret);
 7086: 	case XML_EXP_OR: {
 7087: 	    xmlExpNodePtr tmp;
 7088: 
 7089: #ifdef DEBUG_DERIV
 7090: 	    printf("deriv or: => or(derivs)\n");
 7091: #endif
 7092: 	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7093: 	    if (tmp == NULL) {
 7094: 		return(NULL);
 7095: 	    }
 7096: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
 7097: 	    if (ret == NULL) {
 7098: 	        xmlExpFree(ctxt, tmp);
 7099: 		return(NULL);
 7100: 	    }
 7101:             ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
 7102: 			     NULL, 0, 0);
 7103: 	    return(ret);
 7104: 	}
 7105: 	case XML_EXP_SEQ:
 7106: #ifdef DEBUG_DERIV
 7107: 	    printf("deriv seq: starting with left\n");
 7108: #endif
 7109: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7110: 	    if (ret == NULL) {
 7111: 	        return(NULL);
 7112: 	    } else if (ret == forbiddenExp) {
 7113: 	        if (IS_NILLABLE(exp->exp_left)) {
 7114: #ifdef DEBUG_DERIV
 7115: 		    printf("deriv seq: left failed but nillable\n");
 7116: #endif
 7117: 		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
 7118: 		}
 7119: 	    } else {
 7120: #ifdef DEBUG_DERIV
 7121: 		printf("deriv seq: left match => sequence\n");
 7122: #endif
 7123: 	        exp->exp_right->ref++;
 7124: 	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
 7125: 		                         NULL, 0, 0);
 7126: 	    }
 7127: 	    return(ret);
 7128: 	case XML_EXP_COUNT: {
 7129: 	    int min, max;
 7130: 	    xmlExpNodePtr tmp;
 7131: 
 7132: 	    if (exp->exp_max == 0)
 7133: 		return(forbiddenExp);
 7134: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7135: 	    if (ret == NULL)
 7136: 	        return(NULL);
 7137: 	    if (ret == forbiddenExp) {
 7138: #ifdef DEBUG_DERIV
 7139: 		printf("deriv count: pattern mismatch => forbid\n");
 7140: #endif
 7141: 	        return(ret);
 7142: 	    }
 7143: 	    if (exp->exp_max == 1)
 7144: 		return(ret);
 7145: 	    if (exp->exp_max < 0) /* unbounded */
 7146: 		max = -1;
 7147: 	    else
 7148: 		max = exp->exp_max - 1;
 7149: 	    if (exp->exp_min > 0)
 7150: 		min = exp->exp_min - 1;
 7151: 	    else
 7152: 		min = 0;
 7153: 	    exp->exp_left->ref++;
 7154: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
 7155: 				     NULL, min, max);
 7156: 	    if (ret == emptyExp) {
 7157: #ifdef DEBUG_DERIV
 7158: 		printf("deriv count: match to empty => new count\n");
 7159: #endif
 7160: 	        return(tmp);
 7161: 	    }
 7162: #ifdef DEBUG_DERIV
 7163: 	    printf("deriv count: match => sequence with new count\n");
 7164: #endif
 7165: 	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
 7166: 	                              NULL, 0, 0));
 7167: 	}
 7168:     }
 7169:     return(NULL);
 7170: }
 7171: 
 7172: /**
 7173:  * xmlExpStringDerive:
 7174:  * @ctxt: the expression context
 7175:  * @exp: the expression
 7176:  * @str: the string
 7177:  * @len: the string len in bytes if available
 7178:  *
 7179:  * Do one step of Brzozowski derivation of the expression @exp with
 7180:  * respect to the input string
 7181:  *
 7182:  * Returns the resulting expression or NULL in case of internal error
 7183:  */
 7184: xmlExpNodePtr
 7185: xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7186:                    const xmlChar *str, int len) {
 7187:     const xmlChar *input;
 7188: 
 7189:     if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
 7190:         return(NULL);
 7191:     }
 7192:     /*
 7193:      * check the string is in the dictionnary, if yes use an interned
 7194:      * copy, otherwise we know it's not an acceptable input
 7195:      */
 7196:     input = xmlDictExists(ctxt->dict, str, len);
 7197:     if (input == NULL) {
 7198:         return(forbiddenExp);
 7199:     }
 7200:     return(xmlExpStringDeriveInt(ctxt, exp, input));
 7201: }
 7202: 
 7203: static int
 7204: xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7205:     int ret = 1;
 7206: 
 7207:     if (sub->c_max == -1) {
 7208:         if (exp->c_max != -1)
 7209: 	    ret = 0;
 7210:     } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
 7211:         ret = 0;
 7212:     }
 7213: #if 0
 7214:     if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
 7215:         ret = 0;
 7216: #endif
 7217:     return(ret);
 7218: }
 7219: 
 7220: static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7221:                                         xmlExpNodePtr sub);
 7222: /**
 7223:  * xmlExpDivide:
 7224:  * @ctxt: the expressions context
 7225:  * @exp: the englobing expression
 7226:  * @sub: the subexpression
 7227:  * @mult: the multiple expression
 7228:  * @remain: the remain from the derivation of the multiple
 7229:  *
 7230:  * Check if exp is a multiple of sub, i.e. if there is a finite number n
 7231:  * so that sub{n} subsume exp
 7232:  *
 7233:  * Returns the multiple value if successful, 0 if it is not a multiple
 7234:  *         and -1 in case of internel error.
 7235:  */
 7236: 
 7237: static int
 7238: xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
 7239:              xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
 7240:     int i;
 7241:     xmlExpNodePtr tmp, tmp2;
 7242: 
 7243:     if (mult != NULL) *mult = NULL;
 7244:     if (remain != NULL) *remain = NULL;
 7245:     if (exp->c_max == -1) return(0);
 7246:     if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
 7247: 
 7248:     for (i = 1;i <= exp->c_max;i++) {
 7249:         sub->ref++;
 7250:         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
 7251: 				 sub, NULL, NULL, i, i);
 7252: 	if (tmp == NULL) {
 7253: 	    return(-1);
 7254: 	}
 7255: 	if (!xmlExpCheckCard(tmp, exp)) {
 7256: 	    xmlExpFree(ctxt, tmp);
 7257: 	    continue;
 7258: 	}
 7259: 	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
 7260: 	if (tmp2 == NULL) {
 7261: 	    xmlExpFree(ctxt, tmp);
 7262: 	    return(-1);
 7263: 	}
 7264: 	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
 7265: 	    if (remain != NULL)
 7266: 	        *remain = tmp2;
 7267: 	    else
 7268: 	        xmlExpFree(ctxt, tmp2);
 7269: 	    if (mult != NULL)
 7270: 	        *mult = tmp;
 7271: 	    else
 7272: 	        xmlExpFree(ctxt, tmp);
 7273: #ifdef DEBUG_DERIV
 7274: 	    printf("Divide succeeded %d\n", i);
 7275: #endif
 7276: 	    return(i);
 7277: 	}
 7278: 	xmlExpFree(ctxt, tmp);
 7279: 	xmlExpFree(ctxt, tmp2);
 7280:     }
 7281: #ifdef DEBUG_DERIV
 7282:     printf("Divide failed\n");
 7283: #endif
 7284:     return(0);
 7285: }
 7286: 
 7287: /**
 7288:  * xmlExpExpDeriveInt:
 7289:  * @ctxt: the expressions context
 7290:  * @exp: the englobing expression
 7291:  * @sub: the subexpression
 7292:  *
 7293:  * Try to do a step of Brzozowski derivation but at a higher level
 7294:  * the input being a subexpression.
 7295:  *
 7296:  * Returns the resulting expression or NULL in case of internal error
 7297:  */
 7298: static xmlExpNodePtr
 7299: xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7300:     xmlExpNodePtr ret, tmp, tmp2, tmp3;
 7301:     const xmlChar **tab;
 7302:     int len, i;
 7303: 
 7304:     /*
 7305:      * In case of equality and if the expression can only consume a finite
 7306:      * amount, then the derivation is empty
 7307:      */
 7308:     if ((exp == sub) && (exp->c_max >= 0)) {
 7309: #ifdef DEBUG_DERIV
 7310:         printf("Equal(exp, sub) and finite -> Empty\n");
 7311: #endif
 7312:         return(emptyExp);
 7313:     }
 7314:     /*
 7315:      * decompose sub sequence first
 7316:      */
 7317:     if (sub->type == XML_EXP_EMPTY) {
 7318: #ifdef DEBUG_DERIV
 7319:         printf("Empty(sub) -> Empty\n");
 7320: #endif
 7321: 	exp->ref++;
 7322:         return(exp);
 7323:     }
 7324:     if (sub->type == XML_EXP_SEQ) {
 7325: #ifdef DEBUG_DERIV
 7326:         printf("Seq(sub) -> decompose\n");
 7327: #endif
 7328:         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
 7329: 	if (tmp == NULL)
 7330: 	    return(NULL);
 7331: 	if (tmp == forbiddenExp)
 7332: 	    return(tmp);
 7333: 	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
 7334: 	xmlExpFree(ctxt, tmp);
 7335: 	return(ret);
 7336:     }
 7337:     if (sub->type == XML_EXP_OR) {
 7338: #ifdef DEBUG_DERIV
 7339:         printf("Or(sub) -> decompose\n");
 7340: #endif
 7341:         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
 7342: 	if (tmp == forbiddenExp)
 7343: 	    return(tmp);
 7344: 	if (tmp == NULL)
 7345: 	    return(NULL);
 7346: 	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
 7347: 	if ((ret == NULL) || (ret == forbiddenExp)) {
 7348: 	    xmlExpFree(ctxt, tmp);
 7349: 	    return(ret);
 7350: 	}
 7351: 	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
 7352:     }
 7353:     if (!xmlExpCheckCard(exp, sub)) {
 7354: #ifdef DEBUG_DERIV
 7355:         printf("CheckCard(exp, sub) failed -> Forbid\n");
 7356: #endif
 7357:         return(forbiddenExp);
 7358:     }
 7359:     switch (exp->type) {
 7360:         case XML_EXP_EMPTY:
 7361: 	    if (sub == emptyExp)
 7362: 	        return(emptyExp);
 7363: #ifdef DEBUG_DERIV
 7364: 	    printf("Empty(exp) -> Forbid\n");
 7365: #endif
 7366: 	    return(forbiddenExp);
 7367:         case XML_EXP_FORBID:
 7368: #ifdef DEBUG_DERIV
 7369: 	    printf("Forbid(exp) -> Forbid\n");
 7370: #endif
 7371: 	    return(forbiddenExp);
 7372:         case XML_EXP_ATOM:
 7373: 	    if (sub->type == XML_EXP_ATOM) {
 7374: 	        /* TODO: handle wildcards */
 7375: 	        if (exp->exp_str == sub->exp_str) {
 7376: #ifdef DEBUG_DERIV
 7377: 		    printf("Atom match -> Empty\n");
 7378: #endif
 7379: 		    return(emptyExp);
 7380:                 }
 7381: #ifdef DEBUG_DERIV
 7382: 		printf("Atom mismatch -> Forbid\n");
 7383: #endif
 7384: 	        return(forbiddenExp);
 7385: 	    }
 7386: 	    if ((sub->type == XML_EXP_COUNT) &&
 7387: 	        (sub->exp_max == 1) &&
 7388: 	        (sub->exp_left->type == XML_EXP_ATOM)) {
 7389: 	        /* TODO: handle wildcards */
 7390: 	        if (exp->exp_str == sub->exp_left->exp_str) {
 7391: #ifdef DEBUG_DERIV
 7392: 		    printf("Atom match -> Empty\n");
 7393: #endif
 7394: 		    return(emptyExp);
 7395: 		}
 7396: #ifdef DEBUG_DERIV
 7397: 		printf("Atom mismatch -> Forbid\n");
 7398: #endif
 7399: 	        return(forbiddenExp);
 7400: 	    }
 7401: #ifdef DEBUG_DERIV
 7402: 	    printf("Compex exp vs Atom -> Forbid\n");
 7403: #endif
 7404: 	    return(forbiddenExp);
 7405:         case XML_EXP_SEQ:
 7406: 	    /* try to get the sequence consumed only if possible */
 7407: 	    if (xmlExpCheckCard(exp->exp_left, sub)) {
 7408: 		/* See if the sequence can be consumed directly */
 7409: #ifdef DEBUG_DERIV
 7410: 		printf("Seq trying left only\n");
 7411: #endif
 7412: 		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7413: 		if ((ret != forbiddenExp) && (ret != NULL)) {
 7414: #ifdef DEBUG_DERIV
 7415: 		    printf("Seq trying left only worked\n");
 7416: #endif
 7417: 		    /*
 7418: 		     * TODO: assumption here that we are determinist
 7419: 		     *       i.e. we won't get to a nillable exp left
 7420: 		     *       subset which could be matched by the right
 7421: 		     *       part too.
 7422: 		     * e.g.: (a | b)+,(a | c) and 'a+,a'
 7423: 		     */
 7424: 		    exp->exp_right->ref++;
 7425: 		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
 7426: 					      exp->exp_right, NULL, 0, 0));
 7427: 		}
 7428: #ifdef DEBUG_DERIV
 7429: 	    } else {
 7430: 		printf("Seq: left too short\n");
 7431: #endif
 7432: 	    }
 7433: 	    /* Try instead to decompose */
 7434: 	    if (sub->type == XML_EXP_COUNT) {
 7435: 		int min, max;
 7436: 
 7437: #ifdef DEBUG_DERIV
 7438: 		printf("Seq: sub is a count\n");
 7439: #endif
 7440: 	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
 7441: 		if (ret == NULL)
 7442: 		    return(NULL);
 7443: 		if (ret != forbiddenExp) {
 7444: #ifdef DEBUG_DERIV
 7445: 		    printf("Seq , Count match on left\n");
 7446: #endif
 7447: 		    if (sub->exp_max < 0)
 7448: 		        max = -1;
 7449: 	            else
 7450: 		        max = sub->exp_max -1;
 7451: 		    if (sub->exp_min > 0)
 7452: 		        min = sub->exp_min -1;
 7453: 		    else
 7454: 		        min = 0;
 7455: 		    exp->exp_right->ref++;
 7456: 		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
 7457: 		                             exp->exp_right, NULL, 0, 0);
 7458: 		    if (tmp == NULL)
 7459: 		        return(NULL);
 7460: 
 7461: 		    sub->exp_left->ref++;
 7462: 		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
 7463: 				      sub->exp_left, NULL, NULL, min, max);
 7464: 		    if (tmp2 == NULL) {
 7465: 		        xmlExpFree(ctxt, tmp);
 7466: 			return(NULL);
 7467: 		    }
 7468: 		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
 7469: 		    xmlExpFree(ctxt, tmp);
 7470: 		    xmlExpFree(ctxt, tmp2);
 7471: 		    return(ret);
 7472: 		}
 7473: 	    }
 7474: 	    /* we made no progress on structured operations */
 7475: 	    break;
 7476:         case XML_EXP_OR:
 7477: #ifdef DEBUG_DERIV
 7478: 	    printf("Or , trying both side\n");
 7479: #endif
 7480: 	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7481: 	    if (ret == NULL)
 7482: 	        return(NULL);
 7483: 	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
 7484: 	    if (tmp == NULL) {
 7485: 		xmlExpFree(ctxt, ret);
 7486: 	        return(NULL);
 7487: 	    }
 7488: 	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
 7489:         case XML_EXP_COUNT: {
 7490: 	    int min, max;
 7491: 
 7492: 	    if (sub->type == XML_EXP_COUNT) {
 7493: 	        /*
 7494: 		 * Try to see if the loop is completely subsumed
 7495: 		 */
 7496: 	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
 7497: 		if (tmp == NULL)
 7498: 		    return(NULL);
 7499: 		if (tmp == forbiddenExp) {
 7500: 		    int mult;
 7501: 
 7502: #ifdef DEBUG_DERIV
 7503: 		    printf("Count, Count inner don't subsume\n");
 7504: #endif
 7505: 		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
 7506: 		                        NULL, &tmp);
 7507: 		    if (mult <= 0) {
 7508: #ifdef DEBUG_DERIV
 7509: 			printf("Count, Count not multiple => forbidden\n");
 7510: #endif
 7511:                         return(forbiddenExp);
 7512: 		    }
 7513: 		    if (sub->exp_max == -1) {
 7514: 		        max = -1;
 7515: 			if (exp->exp_max == -1) {
 7516: 			    if (exp->exp_min <= sub->exp_min * mult)
 7517: 			        min = 0;
 7518: 			    else
 7519: 			        min = exp->exp_min - sub->exp_min * mult;
 7520: 			} else {
 7521: #ifdef DEBUG_DERIV
 7522: 			    printf("Count, Count finite can't subsume infinite\n");
 7523: #endif
 7524:                             xmlExpFree(ctxt, tmp);
 7525: 			    return(forbiddenExp);
 7526: 			}
 7527: 		    } else {
 7528: 			if (exp->exp_max == -1) {
 7529: #ifdef DEBUG_DERIV
 7530: 			    printf("Infinite loop consume mult finite loop\n");
 7531: #endif
 7532: 			    if (exp->exp_min > sub->exp_min * mult) {
 7533: 				max = -1;
 7534: 				min = exp->exp_min - sub->exp_min * mult;
 7535: 			    } else {
 7536: 				max = -1;
 7537: 				min = 0;
 7538: 			    }
 7539: 			} else {
 7540: 			    if (exp->exp_max < sub->exp_max * mult) {
 7541: #ifdef DEBUG_DERIV
 7542: 				printf("loops max mult mismatch => forbidden\n");
 7543: #endif
 7544: 				xmlExpFree(ctxt, tmp);
 7545: 				return(forbiddenExp);
 7546: 			    }
 7547: 			    if (sub->exp_max * mult > exp->exp_min)
 7548: 				min = 0;
 7549: 			    else
 7550: 				min = exp->exp_min - sub->exp_max * mult;
 7551: 			    max = exp->exp_max - sub->exp_max * mult;
 7552: 			}
 7553: 		    }
 7554: 		} else if (!IS_NILLABLE(tmp)) {
 7555: 		    /*
 7556: 		     * TODO: loop here to try to grow if working on finite
 7557: 		     *       blocks.
 7558: 		     */
 7559: #ifdef DEBUG_DERIV
 7560: 		    printf("Count, Count remain not nillable => forbidden\n");
 7561: #endif
 7562: 		    xmlExpFree(ctxt, tmp);
 7563: 		    return(forbiddenExp);
 7564: 		} else if (sub->exp_max == -1) {
 7565: 		    if (exp->exp_max == -1) {
 7566: 		        if (exp->exp_min <= sub->exp_min) {
 7567: #ifdef DEBUG_DERIV
 7568: 			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
 7569: #endif
 7570:                             max = -1;
 7571: 			    min = 0;
 7572: 			} else {
 7573: #ifdef DEBUG_DERIV
 7574: 			    printf("Infinite loops min => Count(X,Inf)\n");
 7575: #endif
 7576:                             max = -1;
 7577: 			    min = exp->exp_min - sub->exp_min;
 7578: 			}
 7579: 		    } else if (exp->exp_min > sub->exp_min) {
 7580: #ifdef DEBUG_DERIV
 7581: 			printf("loops min mismatch 1 => forbidden ???\n");
 7582: #endif
 7583: 		        xmlExpFree(ctxt, tmp);
 7584: 		        return(forbiddenExp);
 7585: 		    } else {
 7586: 			max = -1;
 7587: 			min = 0;
 7588: 		    }
 7589: 		} else {
 7590: 		    if (exp->exp_max == -1) {
 7591: #ifdef DEBUG_DERIV
 7592: 			printf("Infinite loop consume finite loop\n");
 7593: #endif
 7594: 		        if (exp->exp_min > sub->exp_min) {
 7595: 			    max = -1;
 7596: 			    min = exp->exp_min - sub->exp_min;
 7597: 			} else {
 7598: 			    max = -1;
 7599: 			    min = 0;
 7600: 			}
 7601: 		    } else {
 7602: 		        if (exp->exp_max < sub->exp_max) {
 7603: #ifdef DEBUG_DERIV
 7604: 			    printf("loops max mismatch => forbidden\n");
 7605: #endif
 7606: 			    xmlExpFree(ctxt, tmp);
 7607: 			    return(forbiddenExp);
 7608: 			}
 7609: 			if (sub->exp_max > exp->exp_min)
 7610: 			    min = 0;
 7611: 			else
 7612: 			    min = exp->exp_min - sub->exp_max;
 7613: 			max = exp->exp_max - sub->exp_max;
 7614: 		    }
 7615: 		}
 7616: #ifdef DEBUG_DERIV
 7617: 		printf("loops match => SEQ(COUNT())\n");
 7618: #endif
 7619: 		exp->exp_left->ref++;
 7620: 		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
 7621: 		                          NULL, NULL, min, max);
 7622: 		if (tmp2 == NULL) {
 7623: 		    return(NULL);
 7624: 		}
 7625:                 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
 7626: 		                         NULL, 0, 0);
 7627: 		return(ret);
 7628: 	    }
 7629: 	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7630: 	    if (tmp == NULL)
 7631: 		return(NULL);
 7632: 	    if (tmp == forbiddenExp) {
 7633: #ifdef DEBUG_DERIV
 7634: 		printf("loop mismatch => forbidden\n");
 7635: #endif
 7636: 		return(forbiddenExp);
 7637: 	    }
 7638: 	    if (exp->exp_min > 0)
 7639: 		min = exp->exp_min - 1;
 7640: 	    else
 7641: 		min = 0;
 7642: 	    if (exp->exp_max < 0)
 7643: 		max = -1;
 7644: 	    else
 7645: 		max = exp->exp_max - 1;
 7646: 
 7647: #ifdef DEBUG_DERIV
 7648: 	    printf("loop match => SEQ(COUNT())\n");
 7649: #endif
 7650: 	    exp->exp_left->ref++;
 7651: 	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
 7652: 				      NULL, NULL, min, max);
 7653: 	    if (tmp2 == NULL)
 7654: 		return(NULL);
 7655: 	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
 7656: 				     NULL, 0, 0);
 7657: 	    return(ret);
 7658: 	}
 7659:     }
 7660: 
 7661: #ifdef DEBUG_DERIV
 7662:     printf("Fallback to derivative\n");
 7663: #endif
 7664:     if (IS_NILLABLE(sub)) {
 7665:         if (!(IS_NILLABLE(exp)))
 7666: 	    return(forbiddenExp);
 7667: 	else
 7668: 	    ret = emptyExp;
 7669:     } else
 7670: 	ret = NULL;
 7671:     /*
 7672:      * here the structured derivation made no progress so
 7673:      * we use the default token based derivation to force one more step
 7674:      */
 7675:     if (ctxt->tabSize == 0)
 7676:         ctxt->tabSize = 40;
 7677: 
 7678:     tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
 7679: 	                               sizeof(const xmlChar *));
 7680:     if (tab == NULL) {
 7681: 	return(NULL);
 7682:     }
 7683: 
 7684:     /*
 7685:      * collect all the strings accepted by the subexpression on input
 7686:      */
 7687:     len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
 7688:     while (len < 0) {
 7689:         const xmlChar **temp;
 7690: 	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
 7691: 	                                     sizeof(const xmlChar *));
 7692: 	if (temp == NULL) {
 7693: 	    xmlFree((xmlChar **) tab);
 7694: 	    return(NULL);
 7695: 	}
 7696: 	tab = temp;
 7697: 	ctxt->tabSize *= 2;
 7698: 	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
 7699:     }
 7700:     for (i = 0;i < len;i++) {
 7701:         tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
 7702: 	if ((tmp == NULL) || (tmp == forbiddenExp)) {
 7703: 	    xmlExpFree(ctxt, ret);
 7704: 	    xmlFree((xmlChar **) tab);
 7705: 	    return(tmp);
 7706: 	}
 7707: 	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
 7708: 	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
 7709: 	    xmlExpFree(ctxt, tmp);
 7710: 	    xmlExpFree(ctxt, ret);
 7711: 	    xmlFree((xmlChar **) tab);
 7712: 	    return(tmp);
 7713: 	}
 7714: 	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
 7715: 	xmlExpFree(ctxt, tmp);
 7716: 	xmlExpFree(ctxt, tmp2);
 7717: 
 7718: 	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
 7719: 	    xmlExpFree(ctxt, ret);
 7720: 	    xmlFree((xmlChar **) tab);
 7721: 	    return(tmp3);
 7722: 	}
 7723: 
 7724: 	if (ret == NULL)
 7725: 	    ret = tmp3;
 7726: 	else {
 7727: 	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
 7728: 	    if (ret == NULL) {
 7729: 		xmlFree((xmlChar **) tab);
 7730: 	        return(NULL);
 7731: 	    }
 7732: 	}
 7733:     }
 7734:     xmlFree((xmlChar **) tab);
 7735:     return(ret);
 7736: }
 7737:     
 7738: /**
 7739:  * xmlExpExpDerive:
 7740:  * @ctxt: the expressions context
 7741:  * @exp: the englobing expression
 7742:  * @sub: the subexpression
 7743:  *
 7744:  * Evaluates the expression resulting from @exp consuming a sub expression @sub
 7745:  * Based on algebraic derivation and sometimes direct Brzozowski derivation
 7746:  * it usually tatkes less than linear time and can handle expressions generating
 7747:  * infinite languages.
 7748:  *
 7749:  * Returns the resulting expression or NULL in case of internal error, the
 7750:  *         result must be freed
 7751:  */
 7752: xmlExpNodePtr
 7753: xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7754:     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
 7755:         return(NULL);
 7756: 
 7757:     /*
 7758:      * O(1) speedups
 7759:      */
 7760:     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
 7761: #ifdef DEBUG_DERIV
 7762: 	printf("Sub nillable and not exp : can't subsume\n");
 7763: #endif
 7764:         return(forbiddenExp);
 7765:     }
 7766:     if (xmlExpCheckCard(exp, sub) == 0) {
 7767: #ifdef DEBUG_DERIV
 7768: 	printf("sub generate longuer sequances than exp : can't subsume\n");
 7769: #endif
 7770:         return(forbiddenExp);
 7771:     }
 7772:     return(xmlExpExpDeriveInt(ctxt, exp, sub));
 7773: }
 7774: 
 7775: /**
 7776:  * xmlExpSubsume:
 7777:  * @ctxt: the expressions context
 7778:  * @exp: the englobing expression
 7779:  * @sub: the subexpression
 7780:  *
 7781:  * Check whether @exp accepts all the languages accexpted by @sub
 7782:  * the input being a subexpression.
 7783:  *
 7784:  * Returns 1 if true 0 if false and -1 in case of failure.
 7785:  */
 7786: int
 7787: xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7788:     xmlExpNodePtr tmp;
 7789:     
 7790:     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
 7791:         return(-1);
 7792: 
 7793:     /*
 7794:      * TODO: speedup by checking the language of sub is a subset of the
 7795:      *       language of exp
 7796:      */
 7797:     /*
 7798:      * O(1) speedups
 7799:      */
 7800:     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
 7801: #ifdef DEBUG_DERIV
 7802: 	printf("Sub nillable and not exp : can't subsume\n");
 7803: #endif
 7804:         return(0);
 7805:     }
 7806:     if (xmlExpCheckCard(exp, sub) == 0) {
 7807: #ifdef DEBUG_DERIV
 7808: 	printf("sub generate longuer sequances than exp : can't subsume\n");
 7809: #endif
 7810:         return(0);
 7811:     }
 7812:     tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
 7813: #ifdef DEBUG_DERIV
 7814:     printf("Result derivation :\n");
 7815:     PRINT_EXP(tmp);
 7816: #endif
 7817:     if (tmp == NULL)
 7818:         return(-1);
 7819:     if (tmp == forbiddenExp)
 7820: 	return(0);
 7821:     if (tmp == emptyExp)
 7822: 	return(1);
 7823:     if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
 7824:         xmlExpFree(ctxt, tmp);
 7825:         return(1);
 7826:     }
 7827:     xmlExpFree(ctxt, tmp);
 7828:     return(0);
 7829: }
 7830: 
 7831: /************************************************************************
 7832:  *									*
 7833:  *			Parsing expression 				*
 7834:  *									*
 7835:  ************************************************************************/
 7836: 
 7837: static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
 7838: 
 7839: #undef CUR
 7840: #define CUR (*ctxt->cur)
 7841: #undef NEXT
 7842: #define NEXT ctxt->cur++;
 7843: #undef IS_BLANK
 7844: #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
 7845: #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
 7846: 
 7847: static int
 7848: xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
 7849:     int ret = 0;
 7850: 
 7851:     SKIP_BLANKS
 7852:     if (CUR == '*') {
 7853: 	NEXT
 7854: 	return(-1);
 7855:     }
 7856:     if ((CUR < '0') || (CUR > '9'))
 7857:         return(-1);
 7858:     while ((CUR >= '0') && (CUR <= '9')) {
 7859:         ret = ret * 10 + (CUR - '0');
 7860: 	NEXT
 7861:     }
 7862:     return(ret);
 7863: }
 7864: 
 7865: static xmlExpNodePtr
 7866: xmlExpParseOr(xmlExpCtxtPtr ctxt) {
 7867:     const char *base;
 7868:     xmlExpNodePtr ret;
 7869:     const xmlChar *val;
 7870: 
 7871:     SKIP_BLANKS
 7872:     base = ctxt->cur;
 7873:     if (*ctxt->cur == '(') {
 7874:         NEXT
 7875: 	ret = xmlExpParseExpr(ctxt);
 7876: 	SKIP_BLANKS
 7877: 	if (*ctxt->cur != ')') {
 7878: 	    fprintf(stderr, "unbalanced '(' : %s\n", base);
 7879: 	    xmlExpFree(ctxt, ret);
 7880: 	    return(NULL);
 7881: 	}
 7882: 	NEXT;
 7883: 	SKIP_BLANKS
 7884: 	goto parse_quantifier;
 7885:     }
 7886:     while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
 7887:            (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
 7888: 	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
 7889: 	NEXT;
 7890:     val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
 7891:     if (val == NULL)
 7892:         return(NULL);
 7893:     ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
 7894:     if (ret == NULL)
 7895:         return(NULL);
 7896:     SKIP_BLANKS
 7897: parse_quantifier:
 7898:     if (CUR == '{') {
 7899:         int min, max;
 7900: 
 7901:         NEXT
 7902: 	min = xmlExpParseNumber(ctxt);
 7903: 	if (min < 0) {
 7904: 	    xmlExpFree(ctxt, ret);
 7905: 	    return(NULL);
 7906: 	}
 7907: 	SKIP_BLANKS
 7908: 	if (CUR == ',') {
 7909: 	    NEXT
 7910: 	    max = xmlExpParseNumber(ctxt);
 7911: 	    SKIP_BLANKS
 7912: 	} else
 7913: 	    max = min;
 7914: 	if (CUR != '}') {
 7915: 	    xmlExpFree(ctxt, ret);
 7916: 	    return(NULL);
 7917: 	}
 7918:         NEXT
 7919: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7920: 	                         min, max);
 7921: 	SKIP_BLANKS
 7922:     } else if (CUR == '?') {
 7923:         NEXT
 7924: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7925: 	                         0, 1);
 7926: 	SKIP_BLANKS
 7927:     } else if (CUR == '+') {
 7928:         NEXT
 7929: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7930: 	                         1, -1);
 7931: 	SKIP_BLANKS
 7932:     } else if (CUR == '*') {
 7933:         NEXT
 7934: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7935: 	                         0, -1);
 7936: 	SKIP_BLANKS
 7937:     } 
 7938:     return(ret);
 7939: }
 7940: 
 7941: 
 7942: static xmlExpNodePtr
 7943: xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
 7944:     xmlExpNodePtr ret, right;
 7945: 
 7946:     ret = xmlExpParseOr(ctxt);
 7947:     SKIP_BLANKS
 7948:     while (CUR == '|') {
 7949:         NEXT
 7950: 	right = xmlExpParseOr(ctxt);
 7951: 	if (right == NULL) {
 7952: 	    xmlExpFree(ctxt, ret);
 7953: 	    return(NULL);
 7954: 	}
 7955: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
 7956: 	if (ret == NULL)
 7957: 	    return(NULL);
 7958:     }
 7959:     return(ret);
 7960: }
 7961: 
 7962: static xmlExpNodePtr
 7963: xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
 7964:     xmlExpNodePtr ret, right;
 7965: 
 7966:     ret = xmlExpParseSeq(ctxt);
 7967:     SKIP_BLANKS
 7968:     while (CUR == ',') {
 7969:         NEXT
 7970: 	right = xmlExpParseSeq(ctxt);
 7971: 	if (right == NULL) {
 7972: 	    xmlExpFree(ctxt, ret);
 7973: 	    return(NULL);
 7974: 	}
 7975: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
 7976: 	if (ret == NULL)
 7977: 	    return(NULL);
 7978:     }
 7979:     return(ret);
 7980: }
 7981: 
 7982: /**
 7983:  * xmlExpParse:
 7984:  * @ctxt: the expressions context
 7985:  * @expr: the 0 terminated string
 7986:  *
 7987:  * Minimal parser for regexps, it understand the following constructs
 7988:  *  - string terminals
 7989:  *  - choice operator |
 7990:  *  - sequence operator ,
 7991:  *  - subexpressions (...)
 7992:  *  - usual cardinality operators + * and ?
 7993:  *  - finite sequences  { min, max }
 7994:  *  - infinite sequences { min, * }
 7995:  * There is minimal checkings made especially no checking on strings values
 7996:  *
 7997:  * Returns a new expression or NULL in case of failure
 7998:  */
 7999: xmlExpNodePtr
 8000: xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
 8001:     xmlExpNodePtr ret;
 8002: 
 8003:     ctxt->expr = expr;
 8004:     ctxt->cur = expr;
 8005: 
 8006:     ret = xmlExpParseExpr(ctxt);
 8007:     SKIP_BLANKS
 8008:     if (*ctxt->cur != 0) {
 8009:         xmlExpFree(ctxt, ret);
 8010:         return(NULL);
 8011:     }
 8012:     return(ret);
 8013: }
 8014: 
 8015: static void
 8016: xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
 8017:     xmlExpNodePtr c;
 8018: 
 8019:     if (expr == NULL) return;
 8020:     if (glob) xmlBufferWriteChar(buf, "(");
 8021:     switch (expr->type) {
 8022:         case XML_EXP_EMPTY:
 8023: 	    xmlBufferWriteChar(buf, "empty");
 8024: 	    break;
 8025:         case XML_EXP_FORBID:
 8026: 	    xmlBufferWriteChar(buf, "forbidden");
 8027: 	    break;
 8028:         case XML_EXP_ATOM:
 8029: 	    xmlBufferWriteCHAR(buf, expr->exp_str);
 8030: 	    break;
 8031:         case XML_EXP_SEQ:
 8032: 	    c = expr->exp_left;
 8033: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8034: 	        xmlExpDumpInt(buf, c, 1);
 8035: 	    else
 8036: 	        xmlExpDumpInt(buf, c, 0);
 8037: 	    xmlBufferWriteChar(buf, " , ");
 8038: 	    c = expr->exp_right;
 8039: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8040: 	        xmlExpDumpInt(buf, c, 1);
 8041: 	    else
 8042: 	        xmlExpDumpInt(buf, c, 0);
 8043:             break;
 8044:         case XML_EXP_OR:
 8045: 	    c = expr->exp_left;
 8046: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8047: 	        xmlExpDumpInt(buf, c, 1);
 8048: 	    else
 8049: 	        xmlExpDumpInt(buf, c, 0);
 8050: 	    xmlBufferWriteChar(buf, " | ");
 8051: 	    c = expr->exp_right;
 8052: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8053: 	        xmlExpDumpInt(buf, c, 1);
 8054: 	    else
 8055: 	        xmlExpDumpInt(buf, c, 0);
 8056:             break;
 8057:         case XML_EXP_COUNT: {
 8058: 	    char rep[40];
 8059: 	    
 8060: 	    c = expr->exp_left;
 8061: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8062: 	        xmlExpDumpInt(buf, c, 1);
 8063: 	    else
 8064: 	        xmlExpDumpInt(buf, c, 0);
 8065: 	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
 8066: 		rep[0] = '?';
 8067: 		rep[1] = 0;
 8068: 	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
 8069: 		rep[0] = '*';
 8070: 		rep[1] = 0;
 8071: 	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
 8072: 		rep[0] = '+';
 8073: 		rep[1] = 0;
 8074: 	    } else if (expr->exp_max == expr->exp_min) {
 8075: 	        snprintf(rep, 39, "{%d}", expr->exp_min);
 8076: 	    } else if (expr->exp_max < 0) {
 8077: 	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
 8078: 	    } else {
 8079: 	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
 8080: 	    }
 8081: 	    rep[39] = 0;
 8082: 	    xmlBufferWriteChar(buf, rep);
 8083: 	    break;
 8084: 	}
 8085: 	default:
 8086: 	    fprintf(stderr, "Error in tree\n");
 8087:     }
 8088:     if (glob)
 8089:         xmlBufferWriteChar(buf, ")");
 8090: }
 8091: /**
 8092:  * xmlExpDump:
 8093:  * @buf:  a buffer to receive the output
 8094:  * @expr:  the compiled expression
 8095:  *
 8096:  * Serialize the expression as compiled to the buffer
 8097:  */
 8098: void
 8099: xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
 8100:     if ((buf == NULL) || (expr == NULL))
 8101:         return;
 8102:     xmlExpDumpInt(buf, expr, 0);
 8103: }
 8104: 
 8105: /**
 8106:  * xmlExpMaxToken:
 8107:  * @expr: a compiled expression
 8108:  *
 8109:  * Indicate the maximum number of input a expression can accept
 8110:  *
 8111:  * Returns the maximum length or -1 in case of error
 8112:  */
 8113: int
 8114: xmlExpMaxToken(xmlExpNodePtr expr) {
 8115:     if (expr == NULL)
 8116:         return(-1);
 8117:     return(expr->c_max);
 8118: }
 8119: 
 8120: /**
 8121:  * xmlExpCtxtNbNodes:
 8122:  * @ctxt: an expression context
 8123:  *
 8124:  * Debugging facility provides the number of allocated nodes at a that point
 8125:  *
 8126:  * Returns the number of nodes in use or -1 in case of error
 8127:  */
 8128: int
 8129: xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
 8130:     if (ctxt == NULL)
 8131:         return(-1);
 8132:     return(ctxt->nb_nodes);
 8133: }
 8134: 
 8135: /**
 8136:  * xmlExpCtxtNbCons:
 8137:  * @ctxt: an expression context
 8138:  *
 8139:  * Debugging facility provides the number of allocated nodes over lifetime
 8140:  *
 8141:  * Returns the number of nodes ever allocated or -1 in case of error
 8142:  */
 8143: int
 8144: xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
 8145:     if (ctxt == NULL)
 8146:         return(-1);
 8147:     return(ctxt->nb_cons);
 8148: }
 8149: 
 8150: #endif /* LIBXML_EXPR_ENABLED */
 8151: #define bottom_xmlregexp
 8152: #include "elfgcchack.h"
 8153: #endif /* LIBXML_REGEXP_ENABLED */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>