File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / xmlregexp.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:22 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD
2.8.0

    1: /*
    2:  * regexp.c: generic and extensible Regular Expression engine
    3:  *
    4:  * Basically designed with the purpose of compiling regexps for 
    5:  * the variety of validation/shemas mechanisms now available in
    6:  * XML related specifications these include:
    7:  *    - XML-1.0 DTD validation
    8:  *    - XML Schemas structure part 1
    9:  *    - XML Schemas Datatypes part 2 especially Appendix F
   10:  *    - RELAX-NG/TREX i.e. the counter proposal
   11:  *
   12:  * See Copyright for the status of this software.
   13:  *
   14:  * Daniel Veillard <veillard@redhat.com>
   15:  */
   16: 
   17: #define IN_LIBXML
   18: #include "libxml.h"
   19: 
   20: #ifdef LIBXML_REGEXP_ENABLED
   21: 
   22: /* #define DEBUG_ERR */
   23: 
   24: #include <stdio.h>
   25: #include <string.h>
   26: #ifdef HAVE_LIMITS_H
   27: #include <limits.h>
   28: #endif
   29: 
   30: #include <libxml/tree.h>
   31: #include <libxml/parserInternals.h>
   32: #include <libxml/xmlregexp.h>
   33: #include <libxml/xmlautomata.h>
   34: #include <libxml/xmlunicode.h>
   35: 
   36: #ifndef INT_MAX
   37: #define INT_MAX 123456789 /* easy to flag and big enough for our needs */
   38: #endif
   39: 
   40: /* #define DEBUG_REGEXP_GRAPH */
   41: /* #define DEBUG_REGEXP_EXEC */
   42: /* #define DEBUG_PUSH */
   43: /* #define DEBUG_COMPACTION */
   44: 
   45: #define MAX_PUSH 10000000
   46: 
   47: #ifdef ERROR
   48: #undef ERROR
   49: #endif
   50: #define ERROR(str)							\
   51:     ctxt->error = XML_REGEXP_COMPILE_ERROR;				\
   52:     xmlRegexpErrCompile(ctxt, str);
   53: #define NEXT ctxt->cur++
   54: #define CUR (*(ctxt->cur))
   55: #define NXT(index) (ctxt->cur[index])
   56: 
   57: #define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
   58: #define NEXTL(l) ctxt->cur += l;
   59: #define XML_REG_STRING_SEPARATOR '|'
   60: /*
   61:  * Need PREV to check on a '-' within a Character Group. May only be used
   62:  * when it's guaranteed that cur is not at the beginning of ctxt->string!
   63:  */
   64: #define PREV (ctxt->cur[-1])
   65: 
   66: /**
   67:  * TODO:
   68:  *
   69:  * macro to flag unimplemented blocks
   70:  */
   71: #define TODO 								\
   72:     xmlGenericError(xmlGenericErrorContext,				\
   73: 	    "Unimplemented block at %s:%d\n",				\
   74:             __FILE__, __LINE__);
   75: 
   76: /************************************************************************
   77:  * 									*
   78:  * 			Datatypes and structures			*
   79:  * 									*
   80:  ************************************************************************/
   81: 
   82: /*
   83:  * Note: the order of the enums below is significant, do not shuffle
   84:  */
   85: typedef enum {
   86:     XML_REGEXP_EPSILON = 1,
   87:     XML_REGEXP_CHARVAL,
   88:     XML_REGEXP_RANGES,
   89:     XML_REGEXP_SUBREG,  /* used for () sub regexps */
   90:     XML_REGEXP_STRING,
   91:     XML_REGEXP_ANYCHAR, /* . */
   92:     XML_REGEXP_ANYSPACE, /* \s */
   93:     XML_REGEXP_NOTSPACE, /* \S */
   94:     XML_REGEXP_INITNAME, /* \l */
   95:     XML_REGEXP_NOTINITNAME, /* \L */
   96:     XML_REGEXP_NAMECHAR, /* \c */
   97:     XML_REGEXP_NOTNAMECHAR, /* \C */
   98:     XML_REGEXP_DECIMAL, /* \d */
   99:     XML_REGEXP_NOTDECIMAL, /* \D */
  100:     XML_REGEXP_REALCHAR, /* \w */
  101:     XML_REGEXP_NOTREALCHAR, /* \W */
  102:     XML_REGEXP_LETTER = 100,
  103:     XML_REGEXP_LETTER_UPPERCASE,
  104:     XML_REGEXP_LETTER_LOWERCASE,
  105:     XML_REGEXP_LETTER_TITLECASE,
  106:     XML_REGEXP_LETTER_MODIFIER,
  107:     XML_REGEXP_LETTER_OTHERS,
  108:     XML_REGEXP_MARK,
  109:     XML_REGEXP_MARK_NONSPACING,
  110:     XML_REGEXP_MARK_SPACECOMBINING,
  111:     XML_REGEXP_MARK_ENCLOSING,
  112:     XML_REGEXP_NUMBER,
  113:     XML_REGEXP_NUMBER_DECIMAL,
  114:     XML_REGEXP_NUMBER_LETTER,
  115:     XML_REGEXP_NUMBER_OTHERS,
  116:     XML_REGEXP_PUNCT,
  117:     XML_REGEXP_PUNCT_CONNECTOR,
  118:     XML_REGEXP_PUNCT_DASH,
  119:     XML_REGEXP_PUNCT_OPEN,
  120:     XML_REGEXP_PUNCT_CLOSE,
  121:     XML_REGEXP_PUNCT_INITQUOTE,
  122:     XML_REGEXP_PUNCT_FINQUOTE,
  123:     XML_REGEXP_PUNCT_OTHERS,
  124:     XML_REGEXP_SEPAR,
  125:     XML_REGEXP_SEPAR_SPACE,
  126:     XML_REGEXP_SEPAR_LINE,
  127:     XML_REGEXP_SEPAR_PARA,
  128:     XML_REGEXP_SYMBOL,
  129:     XML_REGEXP_SYMBOL_MATH,
  130:     XML_REGEXP_SYMBOL_CURRENCY,
  131:     XML_REGEXP_SYMBOL_MODIFIER,
  132:     XML_REGEXP_SYMBOL_OTHERS,
  133:     XML_REGEXP_OTHER,
  134:     XML_REGEXP_OTHER_CONTROL,
  135:     XML_REGEXP_OTHER_FORMAT,
  136:     XML_REGEXP_OTHER_PRIVATE,
  137:     XML_REGEXP_OTHER_NA,
  138:     XML_REGEXP_BLOCK_NAME
  139: } xmlRegAtomType;
  140: 
  141: typedef enum {
  142:     XML_REGEXP_QUANT_EPSILON = 1,
  143:     XML_REGEXP_QUANT_ONCE,
  144:     XML_REGEXP_QUANT_OPT,
  145:     XML_REGEXP_QUANT_MULT,
  146:     XML_REGEXP_QUANT_PLUS,
  147:     XML_REGEXP_QUANT_ONCEONLY,
  148:     XML_REGEXP_QUANT_ALL,
  149:     XML_REGEXP_QUANT_RANGE
  150: } xmlRegQuantType;
  151: 
  152: typedef enum {
  153:     XML_REGEXP_START_STATE = 1,
  154:     XML_REGEXP_FINAL_STATE,
  155:     XML_REGEXP_TRANS_STATE,
  156:     XML_REGEXP_SINK_STATE,
  157:     XML_REGEXP_UNREACH_STATE
  158: } xmlRegStateType;
  159: 
  160: typedef enum {
  161:     XML_REGEXP_MARK_NORMAL = 0,
  162:     XML_REGEXP_MARK_START,
  163:     XML_REGEXP_MARK_VISITED
  164: } xmlRegMarkedType;
  165: 
  166: typedef struct _xmlRegRange xmlRegRange;
  167: typedef xmlRegRange *xmlRegRangePtr;
  168: 
  169: struct _xmlRegRange {
  170:     int neg;		/* 0 normal, 1 not, 2 exclude */
  171:     xmlRegAtomType type;
  172:     int start;
  173:     int end;
  174:     xmlChar *blockName;
  175: };
  176: 
  177: typedef struct _xmlRegAtom xmlRegAtom;
  178: typedef xmlRegAtom *xmlRegAtomPtr;
  179: 
  180: typedef struct _xmlAutomataState xmlRegState;
  181: typedef xmlRegState *xmlRegStatePtr;
  182: 
  183: struct _xmlRegAtom {
  184:     int no;
  185:     xmlRegAtomType type;
  186:     xmlRegQuantType quant;
  187:     int min;
  188:     int max;
  189: 
  190:     void *valuep;
  191:     void *valuep2;
  192:     int neg;
  193:     int codepoint;
  194:     xmlRegStatePtr start;
  195:     xmlRegStatePtr start0;
  196:     xmlRegStatePtr stop;
  197:     int maxRanges;
  198:     int nbRanges;
  199:     xmlRegRangePtr *ranges;
  200:     void *data;
  201: };
  202: 
  203: typedef struct _xmlRegCounter xmlRegCounter;
  204: typedef xmlRegCounter *xmlRegCounterPtr;
  205: 
  206: struct _xmlRegCounter {
  207:     int min;
  208:     int max;
  209: };
  210: 
  211: typedef struct _xmlRegTrans xmlRegTrans;
  212: typedef xmlRegTrans *xmlRegTransPtr;
  213: 
  214: struct _xmlRegTrans {
  215:     xmlRegAtomPtr atom;
  216:     int to;
  217:     int counter;
  218:     int count;
  219:     int nd;
  220: };
  221: 
  222: struct _xmlAutomataState {
  223:     xmlRegStateType type;
  224:     xmlRegMarkedType mark;
  225:     xmlRegMarkedType reached;
  226:     int no;
  227:     int maxTrans;
  228:     int nbTrans;
  229:     xmlRegTrans *trans;
  230:     /*  knowing states ponting to us can speed things up */
  231:     int maxTransTo;
  232:     int nbTransTo;
  233:     int *transTo;
  234: };
  235: 
  236: typedef struct _xmlAutomata xmlRegParserCtxt;
  237: typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
  238: 
  239: #define AM_AUTOMATA_RNG 1
  240: 
  241: struct _xmlAutomata {
  242:     xmlChar *string;
  243:     xmlChar *cur;
  244: 
  245:     int error;
  246:     int neg;
  247: 
  248:     xmlRegStatePtr start;
  249:     xmlRegStatePtr end;
  250:     xmlRegStatePtr state;
  251: 
  252:     xmlRegAtomPtr atom;
  253: 
  254:     int maxAtoms;
  255:     int nbAtoms;
  256:     xmlRegAtomPtr *atoms;
  257: 
  258:     int maxStates;
  259:     int nbStates;
  260:     xmlRegStatePtr *states;
  261: 
  262:     int maxCounters;
  263:     int nbCounters;
  264:     xmlRegCounter *counters;
  265: 
  266:     int determinist;
  267:     int negs;
  268:     int flags;
  269: };
  270: 
  271: struct _xmlRegexp {
  272:     xmlChar *string;
  273:     int nbStates;
  274:     xmlRegStatePtr *states;
  275:     int nbAtoms;
  276:     xmlRegAtomPtr *atoms;
  277:     int nbCounters;
  278:     xmlRegCounter *counters;
  279:     int determinist;
  280:     int flags;
  281:     /*
  282:      * That's the compact form for determinists automatas
  283:      */
  284:     int nbstates;
  285:     int *compact;
  286:     void **transdata;
  287:     int nbstrings;
  288:     xmlChar **stringMap;
  289: };
  290: 
  291: typedef struct _xmlRegExecRollback xmlRegExecRollback;
  292: typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
  293: 
  294: struct _xmlRegExecRollback {
  295:     xmlRegStatePtr state;/* the current state */
  296:     int index;		/* the index in the input stack */
  297:     int nextbranch;	/* the next transition to explore in that state */
  298:     int *counts;	/* save the automata state if it has some */
  299: };
  300: 
  301: typedef struct _xmlRegInputToken xmlRegInputToken;
  302: typedef xmlRegInputToken *xmlRegInputTokenPtr;
  303: 
  304: struct _xmlRegInputToken {
  305:     xmlChar *value;
  306:     void *data;
  307: };
  308: 
  309: struct _xmlRegExecCtxt {
  310:     int status;		/* execution status != 0 indicate an error */
  311:     int determinist;	/* did we find an indeterministic behaviour */
  312:     xmlRegexpPtr comp;	/* the compiled regexp */
  313:     xmlRegExecCallbacks callback;
  314:     void *data;
  315: 
  316:     xmlRegStatePtr state;/* the current state */
  317:     int transno;	/* the current transition on that state */
  318:     int transcount;	/* the number of chars in char counted transitions */
  319: 
  320:     /*
  321:      * A stack of rollback states
  322:      */
  323:     int maxRollbacks;
  324:     int nbRollbacks;
  325:     xmlRegExecRollback *rollbacks;
  326: 
  327:     /*
  328:      * The state of the automata if any
  329:      */
  330:     int *counts;
  331: 
  332:     /*
  333:      * The input stack
  334:      */
  335:     int inputStackMax;
  336:     int inputStackNr;
  337:     int index;
  338:     int *charStack;
  339:     const xmlChar *inputString; /* when operating on characters */
  340:     xmlRegInputTokenPtr inputStack;/* when operating on strings */
  341: 
  342:     /*
  343:      * error handling
  344:      */
  345:     int errStateNo;		/* the error state number */
  346:     xmlRegStatePtr errState;    /* the error state */
  347:     xmlChar *errString;		/* the string raising the error */
  348:     int *errCounts;		/* counters at the error state */
  349:     int nbPush;
  350: };
  351: 
  352: #define REGEXP_ALL_COUNTER	0x123456
  353: #define REGEXP_ALL_LAX_COUNTER	0x123457
  354: 
  355: static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
  356: static void xmlRegFreeState(xmlRegStatePtr state);
  357: static void xmlRegFreeAtom(xmlRegAtomPtr atom);
  358: static int xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr);
  359: static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
  360: static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
  361:                   int neg, int start, int end, const xmlChar *blockName);
  362: 
  363: void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
  364: 
  365: /************************************************************************
  366:  *									*
  367:  * 		Regexp memory error handler				*
  368:  *									*
  369:  ************************************************************************/
  370: /**
  371:  * xmlRegexpErrMemory:
  372:  * @extra:  extra information
  373:  *
  374:  * Handle an out of memory condition
  375:  */
  376: static void
  377: xmlRegexpErrMemory(xmlRegParserCtxtPtr ctxt, const char *extra)
  378: {
  379:     const char *regexp = NULL;
  380:     if (ctxt != NULL) {
  381:         regexp = (const char *) ctxt->string;
  382: 	ctxt->error = XML_ERR_NO_MEMORY;
  383:     }
  384:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  385: 		    XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
  386: 		    regexp, NULL, 0, 0,
  387: 		    "Memory allocation failed : %s\n", extra);
  388: }
  389: 
  390: /**
  391:  * xmlRegexpErrCompile:
  392:  * @extra:  extra information
  393:  *
  394:  * Handle a compilation failure
  395:  */
  396: static void
  397: xmlRegexpErrCompile(xmlRegParserCtxtPtr ctxt, const char *extra)
  398: {
  399:     const char *regexp = NULL;
  400:     int idx = 0;
  401: 
  402:     if (ctxt != NULL) {
  403:         regexp = (const char *) ctxt->string;
  404: 	idx = ctxt->cur - ctxt->string;
  405: 	ctxt->error = XML_REGEXP_COMPILE_ERROR;
  406:     }
  407:     __xmlRaiseError(NULL, NULL, NULL, NULL, NULL, XML_FROM_REGEXP,
  408: 		    XML_REGEXP_COMPILE_ERROR, XML_ERR_FATAL, NULL, 0, extra,
  409: 		    regexp, NULL, idx, 0,
  410: 		    "failed to compile: %s\n", extra);
  411: }
  412: 
  413: /************************************************************************
  414:  * 									*
  415:  * 			Allocation/Deallocation				*
  416:  * 									*
  417:  ************************************************************************/
  418: 
  419: static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
  420: /**
  421:  * xmlRegEpxFromParse:
  422:  * @ctxt:  the parser context used to build it
  423:  *
  424:  * Allocate a new regexp and fill it with the result from the parser
  425:  *
  426:  * Returns the new regexp or NULL in case of error
  427:  */
  428: static xmlRegexpPtr
  429: xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
  430:     xmlRegexpPtr ret;
  431: 
  432:     ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
  433:     if (ret == NULL) {
  434: 	xmlRegexpErrMemory(ctxt, "compiling regexp");
  435: 	return(NULL);
  436:     }
  437:     memset(ret, 0, sizeof(xmlRegexp));
  438:     ret->string = ctxt->string;
  439:     ret->nbStates = ctxt->nbStates;
  440:     ret->states = ctxt->states;
  441:     ret->nbAtoms = ctxt->nbAtoms;
  442:     ret->atoms = ctxt->atoms;
  443:     ret->nbCounters = ctxt->nbCounters;
  444:     ret->counters = ctxt->counters;
  445:     ret->determinist = ctxt->determinist;
  446:     ret->flags = ctxt->flags;
  447:     if (ret->determinist == -1) {
  448:         xmlRegexpIsDeterminist(ret);
  449:     }
  450: 
  451:     if ((ret->determinist != 0) &&
  452: 	(ret->nbCounters == 0) &&
  453: 	(ctxt->negs == 0) &&
  454: 	(ret->atoms != NULL) &&
  455: 	(ret->atoms[0] != NULL) &&
  456: 	(ret->atoms[0]->type == XML_REGEXP_STRING)) {
  457: 	int i, j, nbstates = 0, nbatoms = 0;
  458: 	int *stateRemap;
  459: 	int *stringRemap;
  460: 	int *transitions;
  461: 	void **transdata;
  462: 	xmlChar **stringMap;
  463:         xmlChar *value;
  464: 
  465: 	/*
  466: 	 * Switch to a compact representation
  467: 	 * 1/ counting the effective number of states left
  468: 	 * 2/ counting the unique number of atoms, and check that
  469: 	 *    they are all of the string type
  470: 	 * 3/ build a table state x atom for the transitions
  471: 	 */
  472: 
  473: 	stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
  474: 	if (stateRemap == NULL) {
  475: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  476: 	    xmlFree(ret);
  477: 	    return(NULL);
  478: 	}
  479: 	for (i = 0;i < ret->nbStates;i++) {
  480: 	    if (ret->states[i] != NULL) {
  481: 		stateRemap[i] = nbstates;
  482: 		nbstates++;
  483: 	    } else {
  484: 		stateRemap[i] = -1;
  485: 	    }
  486: 	}
  487: #ifdef DEBUG_COMPACTION
  488: 	printf("Final: %d states\n", nbstates);
  489: #endif
  490: 	stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
  491: 	if (stringMap == NULL) {
  492: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  493: 	    xmlFree(stateRemap);
  494: 	    xmlFree(ret);
  495: 	    return(NULL);
  496: 	}
  497: 	stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
  498: 	if (stringRemap == NULL) {
  499: 	    xmlRegexpErrMemory(ctxt, "compiling regexp");
  500: 	    xmlFree(stringMap);
  501: 	    xmlFree(stateRemap);
  502: 	    xmlFree(ret);
  503: 	    return(NULL);
  504: 	}
  505: 	for (i = 0;i < ret->nbAtoms;i++) {
  506: 	    if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
  507: 		(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
  508: 		value = ret->atoms[i]->valuep;
  509:                 for (j = 0;j < nbatoms;j++) {
  510: 		    if (xmlStrEqual(stringMap[j], value)) {
  511: 			stringRemap[i] = j;
  512: 			break;
  513: 		    }
  514: 		}
  515: 		if (j >= nbatoms) {
  516: 		    stringRemap[i] = nbatoms;
  517: 		    stringMap[nbatoms] = xmlStrdup(value);
  518: 		    if (stringMap[nbatoms] == NULL) {
  519: 			for (i = 0;i < nbatoms;i++)
  520: 			    xmlFree(stringMap[i]);
  521: 			xmlFree(stringRemap);
  522: 			xmlFree(stringMap);
  523: 			xmlFree(stateRemap);
  524: 			xmlFree(ret);
  525: 			return(NULL);
  526: 		    }
  527: 		    nbatoms++;
  528: 		}
  529: 	    } else {
  530: 		xmlFree(stateRemap);
  531: 		xmlFree(stringRemap);
  532: 		for (i = 0;i < nbatoms;i++)
  533: 		    xmlFree(stringMap[i]);
  534: 		xmlFree(stringMap);
  535: 		xmlFree(ret);
  536: 		return(NULL);
  537: 	    }
  538: 	}
  539: #ifdef DEBUG_COMPACTION
  540: 	printf("Final: %d atoms\n", nbatoms);
  541: #endif
  542: 	transitions = (int *) xmlMalloc((nbstates + 1) *
  543: 	                                (nbatoms + 1) * sizeof(int));
  544: 	if (transitions == NULL) {
  545: 	    xmlFree(stateRemap);
  546: 	    xmlFree(stringRemap);
  547: 	    xmlFree(stringMap);
  548: 	    xmlFree(ret);
  549: 	    return(NULL);
  550: 	}
  551: 	memset(transitions, 0, (nbstates + 1) * (nbatoms + 1) * sizeof(int));
  552: 
  553: 	/*
  554: 	 * Allocate the transition table. The first entry for each
  555: 	 * state corresponds to the state type.
  556: 	 */
  557: 	transdata = NULL;
  558: 
  559: 	for (i = 0;i < ret->nbStates;i++) {
  560: 	    int stateno, atomno, targetno, prev;
  561: 	    xmlRegStatePtr state;
  562: 	    xmlRegTransPtr trans;
  563: 
  564: 	    stateno = stateRemap[i];
  565: 	    if (stateno == -1)
  566: 		continue;
  567: 	    state = ret->states[i];
  568: 
  569: 	    transitions[stateno * (nbatoms + 1)] = state->type;
  570: 
  571: 	    for (j = 0;j < state->nbTrans;j++) {
  572: 		trans = &(state->trans[j]);
  573: 		if ((trans->to == -1) || (trans->atom == NULL))
  574: 		    continue;
  575:                 atomno = stringRemap[trans->atom->no];
  576: 		if ((trans->atom->data != NULL) && (transdata == NULL)) {
  577: 		    transdata = (void **) xmlMalloc(nbstates * nbatoms *
  578: 			                            sizeof(void *));
  579: 		    if (transdata != NULL)
  580: 			memset(transdata, 0,
  581: 			       nbstates * nbatoms * sizeof(void *));
  582: 		    else {
  583: 			xmlRegexpErrMemory(ctxt, "compiling regexp");
  584: 			break;
  585: 		    }
  586: 		}
  587: 		targetno = stateRemap[trans->to];
  588: 		/*
  589: 		 * if the same atom can generate transitions to 2 different
  590: 		 * states then it means the automata is not determinist and
  591: 		 * the compact form can't be used !
  592: 		 */
  593: 		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
  594: 		if (prev != 0) {
  595: 		    if (prev != targetno + 1) {
  596: 			ret->determinist = 0;
  597: #ifdef DEBUG_COMPACTION
  598: 			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
  599: 			       i, j, trans->atom->no, trans->to, atomno, targetno);
  600: 			printf("       previous to is %d\n", prev);
  601: #endif
  602: 			if (transdata != NULL)
  603: 			    xmlFree(transdata);
  604: 			xmlFree(transitions);
  605: 			xmlFree(stateRemap);
  606: 			xmlFree(stringRemap);
  607: 			for (i = 0;i < nbatoms;i++)
  608: 			    xmlFree(stringMap[i]);
  609: 			xmlFree(stringMap);
  610: 			goto not_determ;
  611: 		    }
  612: 		} else {
  613: #if 0
  614: 		    printf("State %d trans %d: atom %d to %d : %d to %d\n",
  615: 			   i, j, trans->atom->no, trans->to, atomno, targetno);
  616: #endif
  617: 		    transitions[stateno * (nbatoms + 1) + atomno + 1] =
  618: 			targetno + 1; /* to avoid 0 */
  619: 		    if (transdata != NULL)
  620: 			transdata[stateno * nbatoms + atomno] =
  621: 			    trans->atom->data;
  622: 		}
  623: 	    }
  624: 	}
  625: 	ret->determinist = 1;
  626: #ifdef DEBUG_COMPACTION
  627: 	/*
  628: 	 * Debug
  629: 	 */
  630: 	for (i = 0;i < nbstates;i++) {
  631: 	    for (j = 0;j < nbatoms + 1;j++) {
  632:                 printf("%02d ", transitions[i * (nbatoms + 1) + j]);
  633: 	    }
  634: 	    printf("\n");
  635: 	}
  636: 	printf("\n");
  637: #endif
  638: 	/*
  639: 	 * Cleanup of the old data
  640: 	 */
  641: 	if (ret->states != NULL) {
  642: 	    for (i = 0;i < ret->nbStates;i++)
  643: 		xmlRegFreeState(ret->states[i]);
  644: 	    xmlFree(ret->states);
  645: 	}
  646: 	ret->states = NULL;
  647: 	ret->nbStates = 0;
  648: 	if (ret->atoms != NULL) {
  649: 	    for (i = 0;i < ret->nbAtoms;i++)
  650: 		xmlRegFreeAtom(ret->atoms[i]);
  651: 	    xmlFree(ret->atoms);
  652: 	}
  653: 	ret->atoms = NULL;
  654: 	ret->nbAtoms = 0;
  655: 
  656: 	ret->compact = transitions;
  657: 	ret->transdata = transdata;
  658: 	ret->stringMap = stringMap;
  659: 	ret->nbstrings = nbatoms;
  660: 	ret->nbstates = nbstates;
  661: 	xmlFree(stateRemap);
  662: 	xmlFree(stringRemap);
  663:     }
  664: not_determ:
  665:     ctxt->string = NULL;
  666:     ctxt->nbStates = 0;
  667:     ctxt->states = NULL;
  668:     ctxt->nbAtoms = 0;
  669:     ctxt->atoms = NULL;
  670:     ctxt->nbCounters = 0;
  671:     ctxt->counters = NULL;
  672:     return(ret);
  673: }
  674: 
  675: /**
  676:  * xmlRegNewParserCtxt:
  677:  * @string:  the string to parse
  678:  *
  679:  * Allocate a new regexp parser context
  680:  *
  681:  * Returns the new context or NULL in case of error
  682:  */
  683: static xmlRegParserCtxtPtr
  684: xmlRegNewParserCtxt(const xmlChar *string) {
  685:     xmlRegParserCtxtPtr ret;
  686: 
  687:     ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
  688:     if (ret == NULL)
  689: 	return(NULL);
  690:     memset(ret, 0, sizeof(xmlRegParserCtxt));
  691:     if (string != NULL)
  692: 	ret->string = xmlStrdup(string);
  693:     ret->cur = ret->string;
  694:     ret->neg = 0;
  695:     ret->negs = 0;
  696:     ret->error = 0;
  697:     ret->determinist = -1;
  698:     return(ret);
  699: }
  700: 
  701: /**
  702:  * xmlRegNewRange:
  703:  * @ctxt:  the regexp parser context
  704:  * @neg:  is that negative
  705:  * @type:  the type of range
  706:  * @start:  the start codepoint
  707:  * @end:  the end codepoint
  708:  *
  709:  * Allocate a new regexp range
  710:  *
  711:  * Returns the new range or NULL in case of error
  712:  */
  713: static xmlRegRangePtr
  714: xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
  715: 	       int neg, xmlRegAtomType type, int start, int end) {
  716:     xmlRegRangePtr ret;
  717: 
  718:     ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
  719:     if (ret == NULL) {
  720: 	xmlRegexpErrMemory(ctxt, "allocating range");
  721: 	return(NULL);
  722:     }
  723:     ret->neg = neg;
  724:     ret->type = type;
  725:     ret->start = start;
  726:     ret->end = end;
  727:     return(ret);
  728: }
  729: 
  730: /**
  731:  * xmlRegFreeRange:
  732:  * @range:  the regexp range
  733:  *
  734:  * Free a regexp range
  735:  */
  736: static void
  737: xmlRegFreeRange(xmlRegRangePtr range) {
  738:     if (range == NULL)
  739: 	return;
  740: 
  741:     if (range->blockName != NULL)
  742: 	xmlFree(range->blockName);
  743:     xmlFree(range);
  744: }
  745: 
  746: /**
  747:  * xmlRegCopyRange:
  748:  * @range:  the regexp range
  749:  *
  750:  * Copy a regexp range
  751:  *
  752:  * Returns the new copy or NULL in case of error.
  753:  */
  754: static xmlRegRangePtr
  755: xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
  756:     xmlRegRangePtr ret;
  757: 
  758:     if (range == NULL)
  759: 	return(NULL);
  760: 
  761:     ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
  762:                          range->end);
  763:     if (ret == NULL)
  764:         return(NULL);
  765:     if (range->blockName != NULL) {
  766: 	ret->blockName = xmlStrdup(range->blockName);
  767: 	if (ret->blockName == NULL) {
  768: 	    xmlRegexpErrMemory(ctxt, "allocating range");
  769: 	    xmlRegFreeRange(ret);
  770: 	    return(NULL);
  771: 	}
  772:     }
  773:     return(ret);
  774: }
  775: 
  776: /**
  777:  * xmlRegNewAtom:
  778:  * @ctxt:  the regexp parser context
  779:  * @type:  the type of atom
  780:  *
  781:  * Allocate a new atom
  782:  *
  783:  * Returns the new atom or NULL in case of error
  784:  */
  785: static xmlRegAtomPtr
  786: xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
  787:     xmlRegAtomPtr ret;
  788: 
  789:     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  790:     if (ret == NULL) {
  791: 	xmlRegexpErrMemory(ctxt, "allocating atom");
  792: 	return(NULL);
  793:     }
  794:     memset(ret, 0, sizeof(xmlRegAtom));
  795:     ret->type = type;
  796:     ret->quant = XML_REGEXP_QUANT_ONCE;
  797:     ret->min = 0;
  798:     ret->max = 0;
  799:     return(ret);
  800: }
  801: 
  802: /**
  803:  * xmlRegFreeAtom:
  804:  * @atom:  the regexp atom
  805:  *
  806:  * Free a regexp atom
  807:  */
  808: static void
  809: xmlRegFreeAtom(xmlRegAtomPtr atom) {
  810:     int i;
  811: 
  812:     if (atom == NULL)
  813: 	return;
  814: 
  815:     for (i = 0;i < atom->nbRanges;i++)
  816: 	xmlRegFreeRange(atom->ranges[i]);
  817:     if (atom->ranges != NULL)
  818: 	xmlFree(atom->ranges);
  819:     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep != NULL))
  820: 	xmlFree(atom->valuep);
  821:     if ((atom->type == XML_REGEXP_STRING) && (atom->valuep2 != NULL))
  822: 	xmlFree(atom->valuep2);
  823:     if ((atom->type == XML_REGEXP_BLOCK_NAME) && (atom->valuep != NULL))
  824: 	xmlFree(atom->valuep);
  825:     xmlFree(atom);
  826: }
  827: 
  828: /**
  829:  * xmlRegCopyAtom:
  830:  * @ctxt:  the regexp parser context
  831:  * @atom:  the oiginal atom
  832:  *
  833:  * Allocate a new regexp range
  834:  *
  835:  * Returns the new atom or NULL in case of error
  836:  */
  837: static xmlRegAtomPtr
  838: xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
  839:     xmlRegAtomPtr ret;
  840: 
  841:     ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
  842:     if (ret == NULL) {
  843: 	xmlRegexpErrMemory(ctxt, "copying atom");
  844: 	return(NULL);
  845:     }
  846:     memset(ret, 0, sizeof(xmlRegAtom));
  847:     ret->type = atom->type;
  848:     ret->quant = atom->quant;
  849:     ret->min = atom->min;
  850:     ret->max = atom->max;
  851:     if (atom->nbRanges > 0) {
  852:         int i;
  853: 
  854:         ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
  855: 	                                           atom->nbRanges);
  856: 	if (ret->ranges == NULL) {
  857: 	    xmlRegexpErrMemory(ctxt, "copying atom");
  858: 	    goto error;
  859: 	}
  860: 	for (i = 0;i < atom->nbRanges;i++) {
  861: 	    ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
  862: 	    if (ret->ranges[i] == NULL)
  863: 	        goto error;
  864: 	    ret->nbRanges = i + 1;
  865: 	}
  866:     }
  867:     return(ret);
  868: 
  869: error:
  870:     xmlRegFreeAtom(ret);
  871:     return(NULL);
  872: }
  873: 
  874: static xmlRegStatePtr
  875: xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
  876:     xmlRegStatePtr ret;
  877: 
  878:     ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
  879:     if (ret == NULL) {
  880: 	xmlRegexpErrMemory(ctxt, "allocating state");
  881: 	return(NULL);
  882:     }
  883:     memset(ret, 0, sizeof(xmlRegState));
  884:     ret->type = XML_REGEXP_TRANS_STATE;
  885:     ret->mark = XML_REGEXP_MARK_NORMAL;
  886:     return(ret);
  887: }
  888: 
  889: /**
  890:  * xmlRegFreeState:
  891:  * @state:  the regexp state
  892:  *
  893:  * Free a regexp state
  894:  */
  895: static void
  896: xmlRegFreeState(xmlRegStatePtr state) {
  897:     if (state == NULL)
  898: 	return;
  899: 
  900:     if (state->trans != NULL)
  901: 	xmlFree(state->trans);
  902:     if (state->transTo != NULL)
  903: 	xmlFree(state->transTo);
  904:     xmlFree(state);
  905: }
  906: 
  907: /**
  908:  * xmlRegFreeParserCtxt:
  909:  * @ctxt:  the regexp parser context
  910:  *
  911:  * Free a regexp parser context
  912:  */
  913: static void
  914: xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
  915:     int i;
  916:     if (ctxt == NULL)
  917: 	return;
  918: 
  919:     if (ctxt->string != NULL)
  920: 	xmlFree(ctxt->string);
  921:     if (ctxt->states != NULL) {
  922: 	for (i = 0;i < ctxt->nbStates;i++)
  923: 	    xmlRegFreeState(ctxt->states[i]);
  924: 	xmlFree(ctxt->states);
  925:     }
  926:     if (ctxt->atoms != NULL) {
  927: 	for (i = 0;i < ctxt->nbAtoms;i++)
  928: 	    xmlRegFreeAtom(ctxt->atoms[i]);
  929: 	xmlFree(ctxt->atoms);
  930:     }
  931:     if (ctxt->counters != NULL)
  932: 	xmlFree(ctxt->counters);
  933:     xmlFree(ctxt);
  934: }
  935: 
  936: /************************************************************************
  937:  * 									*
  938:  * 			Display of Data structures			*
  939:  * 									*
  940:  ************************************************************************/
  941: 
  942: static void
  943: xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
  944:     switch (type) {
  945:         case XML_REGEXP_EPSILON:
  946: 	    fprintf(output, "epsilon "); break;
  947:         case XML_REGEXP_CHARVAL:
  948: 	    fprintf(output, "charval "); break;
  949:         case XML_REGEXP_RANGES:
  950: 	    fprintf(output, "ranges "); break;
  951:         case XML_REGEXP_SUBREG:
  952: 	    fprintf(output, "subexpr "); break;
  953:         case XML_REGEXP_STRING:
  954: 	    fprintf(output, "string "); break;
  955:         case XML_REGEXP_ANYCHAR:
  956: 	    fprintf(output, "anychar "); break;
  957:         case XML_REGEXP_ANYSPACE:
  958: 	    fprintf(output, "anyspace "); break;
  959:         case XML_REGEXP_NOTSPACE:
  960: 	    fprintf(output, "notspace "); break;
  961:         case XML_REGEXP_INITNAME:
  962: 	    fprintf(output, "initname "); break;
  963:         case XML_REGEXP_NOTINITNAME:
  964: 	    fprintf(output, "notinitname "); break;
  965:         case XML_REGEXP_NAMECHAR:
  966: 	    fprintf(output, "namechar "); break;
  967:         case XML_REGEXP_NOTNAMECHAR:
  968: 	    fprintf(output, "notnamechar "); break;
  969:         case XML_REGEXP_DECIMAL:
  970: 	    fprintf(output, "decimal "); break;
  971:         case XML_REGEXP_NOTDECIMAL:
  972: 	    fprintf(output, "notdecimal "); break;
  973:         case XML_REGEXP_REALCHAR:
  974: 	    fprintf(output, "realchar "); break;
  975:         case XML_REGEXP_NOTREALCHAR:
  976: 	    fprintf(output, "notrealchar "); break;
  977:         case XML_REGEXP_LETTER:
  978:             fprintf(output, "LETTER "); break;
  979:         case XML_REGEXP_LETTER_UPPERCASE:
  980:             fprintf(output, "LETTER_UPPERCASE "); break;
  981:         case XML_REGEXP_LETTER_LOWERCASE:
  982:             fprintf(output, "LETTER_LOWERCASE "); break;
  983:         case XML_REGEXP_LETTER_TITLECASE:
  984:             fprintf(output, "LETTER_TITLECASE "); break;
  985:         case XML_REGEXP_LETTER_MODIFIER:
  986:             fprintf(output, "LETTER_MODIFIER "); break;
  987:         case XML_REGEXP_LETTER_OTHERS:
  988:             fprintf(output, "LETTER_OTHERS "); break;
  989:         case XML_REGEXP_MARK:
  990:             fprintf(output, "MARK "); break;
  991:         case XML_REGEXP_MARK_NONSPACING:
  992:             fprintf(output, "MARK_NONSPACING "); break;
  993:         case XML_REGEXP_MARK_SPACECOMBINING:
  994:             fprintf(output, "MARK_SPACECOMBINING "); break;
  995:         case XML_REGEXP_MARK_ENCLOSING:
  996:             fprintf(output, "MARK_ENCLOSING "); break;
  997:         case XML_REGEXP_NUMBER:
  998:             fprintf(output, "NUMBER "); break;
  999:         case XML_REGEXP_NUMBER_DECIMAL:
 1000:             fprintf(output, "NUMBER_DECIMAL "); break;
 1001:         case XML_REGEXP_NUMBER_LETTER:
 1002:             fprintf(output, "NUMBER_LETTER "); break;
 1003:         case XML_REGEXP_NUMBER_OTHERS:
 1004:             fprintf(output, "NUMBER_OTHERS "); break;
 1005:         case XML_REGEXP_PUNCT:
 1006:             fprintf(output, "PUNCT "); break;
 1007:         case XML_REGEXP_PUNCT_CONNECTOR:
 1008:             fprintf(output, "PUNCT_CONNECTOR "); break;
 1009:         case XML_REGEXP_PUNCT_DASH:
 1010:             fprintf(output, "PUNCT_DASH "); break;
 1011:         case XML_REGEXP_PUNCT_OPEN:
 1012:             fprintf(output, "PUNCT_OPEN "); break;
 1013:         case XML_REGEXP_PUNCT_CLOSE:
 1014:             fprintf(output, "PUNCT_CLOSE "); break;
 1015:         case XML_REGEXP_PUNCT_INITQUOTE:
 1016:             fprintf(output, "PUNCT_INITQUOTE "); break;
 1017:         case XML_REGEXP_PUNCT_FINQUOTE:
 1018:             fprintf(output, "PUNCT_FINQUOTE "); break;
 1019:         case XML_REGEXP_PUNCT_OTHERS:
 1020:             fprintf(output, "PUNCT_OTHERS "); break;
 1021:         case XML_REGEXP_SEPAR:
 1022:             fprintf(output, "SEPAR "); break;
 1023:         case XML_REGEXP_SEPAR_SPACE:
 1024:             fprintf(output, "SEPAR_SPACE "); break;
 1025:         case XML_REGEXP_SEPAR_LINE:
 1026:             fprintf(output, "SEPAR_LINE "); break;
 1027:         case XML_REGEXP_SEPAR_PARA:
 1028:             fprintf(output, "SEPAR_PARA "); break;
 1029:         case XML_REGEXP_SYMBOL:
 1030:             fprintf(output, "SYMBOL "); break;
 1031:         case XML_REGEXP_SYMBOL_MATH:
 1032:             fprintf(output, "SYMBOL_MATH "); break;
 1033:         case XML_REGEXP_SYMBOL_CURRENCY:
 1034:             fprintf(output, "SYMBOL_CURRENCY "); break;
 1035:         case XML_REGEXP_SYMBOL_MODIFIER:
 1036:             fprintf(output, "SYMBOL_MODIFIER "); break;
 1037:         case XML_REGEXP_SYMBOL_OTHERS:
 1038:             fprintf(output, "SYMBOL_OTHERS "); break;
 1039:         case XML_REGEXP_OTHER:
 1040:             fprintf(output, "OTHER "); break;
 1041:         case XML_REGEXP_OTHER_CONTROL:
 1042:             fprintf(output, "OTHER_CONTROL "); break;
 1043:         case XML_REGEXP_OTHER_FORMAT:
 1044:             fprintf(output, "OTHER_FORMAT "); break;
 1045:         case XML_REGEXP_OTHER_PRIVATE:
 1046:             fprintf(output, "OTHER_PRIVATE "); break;
 1047:         case XML_REGEXP_OTHER_NA:
 1048:             fprintf(output, "OTHER_NA "); break;
 1049:         case XML_REGEXP_BLOCK_NAME:
 1050: 	    fprintf(output, "BLOCK "); break;
 1051:     }
 1052: }
 1053: 
 1054: static void
 1055: xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
 1056:     switch (type) {
 1057:         case XML_REGEXP_QUANT_EPSILON:
 1058: 	    fprintf(output, "epsilon "); break;
 1059:         case XML_REGEXP_QUANT_ONCE:
 1060: 	    fprintf(output, "once "); break;
 1061:         case XML_REGEXP_QUANT_OPT:
 1062: 	    fprintf(output, "? "); break;
 1063:         case XML_REGEXP_QUANT_MULT:
 1064: 	    fprintf(output, "* "); break;
 1065:         case XML_REGEXP_QUANT_PLUS:
 1066: 	    fprintf(output, "+ "); break;
 1067: 	case XML_REGEXP_QUANT_RANGE:
 1068: 	    fprintf(output, "range "); break;
 1069: 	case XML_REGEXP_QUANT_ONCEONLY:
 1070: 	    fprintf(output, "onceonly "); break;
 1071: 	case XML_REGEXP_QUANT_ALL:
 1072: 	    fprintf(output, "all "); break;
 1073:     }
 1074: }
 1075: static void
 1076: xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
 1077:     fprintf(output, "  range: ");
 1078:     if (range->neg)
 1079: 	fprintf(output, "negative ");
 1080:     xmlRegPrintAtomType(output, range->type);
 1081:     fprintf(output, "%c - %c\n", range->start, range->end);
 1082: }
 1083: 
 1084: static void
 1085: xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
 1086:     fprintf(output, " atom: ");
 1087:     if (atom == NULL) {
 1088: 	fprintf(output, "NULL\n");
 1089: 	return;
 1090:     }
 1091:     if (atom->neg)
 1092:         fprintf(output, "not ");
 1093:     xmlRegPrintAtomType(output, atom->type);
 1094:     xmlRegPrintQuantType(output, atom->quant);
 1095:     if (atom->quant == XML_REGEXP_QUANT_RANGE)
 1096: 	fprintf(output, "%d-%d ", atom->min, atom->max);
 1097:     if (atom->type == XML_REGEXP_STRING)
 1098: 	fprintf(output, "'%s' ", (char *) atom->valuep);
 1099:     if (atom->type == XML_REGEXP_CHARVAL)
 1100: 	fprintf(output, "char %c\n", atom->codepoint);
 1101:     else if (atom->type == XML_REGEXP_RANGES) {
 1102: 	int i;
 1103: 	fprintf(output, "%d entries\n", atom->nbRanges);
 1104: 	for (i = 0; i < atom->nbRanges;i++)
 1105: 	    xmlRegPrintRange(output, atom->ranges[i]);
 1106:     } else if (atom->type == XML_REGEXP_SUBREG) {
 1107: 	fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
 1108:     } else {
 1109: 	fprintf(output, "\n");
 1110:     }
 1111: }
 1112: 
 1113: static void
 1114: xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
 1115:     fprintf(output, "  trans: ");
 1116:     if (trans == NULL) {
 1117: 	fprintf(output, "NULL\n");
 1118: 	return;
 1119:     }
 1120:     if (trans->to < 0) {
 1121: 	fprintf(output, "removed\n");
 1122: 	return;
 1123:     }
 1124:     if (trans->nd != 0) {
 1125: 	if (trans->nd == 2)
 1126: 	    fprintf(output, "last not determinist, ");
 1127: 	else
 1128: 	    fprintf(output, "not determinist, ");
 1129:     }
 1130:     if (trans->counter >= 0) {
 1131: 	fprintf(output, "counted %d, ", trans->counter);
 1132:     }
 1133:     if (trans->count == REGEXP_ALL_COUNTER) {
 1134: 	fprintf(output, "all transition, ");
 1135:     } else if (trans->count >= 0) {
 1136: 	fprintf(output, "count based %d, ", trans->count);
 1137:     }
 1138:     if (trans->atom == NULL) {
 1139: 	fprintf(output, "epsilon to %d\n", trans->to);
 1140: 	return;
 1141:     }
 1142:     if (trans->atom->type == XML_REGEXP_CHARVAL)
 1143: 	fprintf(output, "char %c ", trans->atom->codepoint);
 1144:     fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
 1145: }
 1146:     
 1147: static void
 1148: xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
 1149:     int i;
 1150: 
 1151:     fprintf(output, " state: ");
 1152:     if (state == NULL) {
 1153: 	fprintf(output, "NULL\n");
 1154: 	return;
 1155:     }
 1156:     if (state->type == XML_REGEXP_START_STATE)
 1157: 	fprintf(output, "START ");
 1158:     if (state->type == XML_REGEXP_FINAL_STATE)
 1159: 	fprintf(output, "FINAL ");
 1160:     
 1161:     fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
 1162:     for (i = 0;i < state->nbTrans; i++) {
 1163: 	xmlRegPrintTrans(output, &(state->trans[i]));
 1164:     }
 1165: }
 1166: 
 1167: #ifdef DEBUG_REGEXP_GRAPH
 1168: static void
 1169: xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
 1170:     int i;
 1171: 
 1172:     fprintf(output, " ctxt: ");
 1173:     if (ctxt == NULL) {
 1174: 	fprintf(output, "NULL\n");
 1175: 	return;
 1176:     }
 1177:     fprintf(output, "'%s' ", ctxt->string);
 1178:     if (ctxt->error)
 1179: 	fprintf(output, "error ");
 1180:     if (ctxt->neg)
 1181: 	fprintf(output, "neg ");
 1182:     fprintf(output, "\n");
 1183:     fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
 1184:     for (i = 0;i < ctxt->nbAtoms; i++) {
 1185: 	fprintf(output, " %02d ", i);
 1186: 	xmlRegPrintAtom(output, ctxt->atoms[i]);
 1187:     }
 1188:     if (ctxt->atom != NULL) {
 1189: 	fprintf(output, "current atom:\n");
 1190: 	xmlRegPrintAtom(output, ctxt->atom);
 1191:     }
 1192:     fprintf(output, "%d states:", ctxt->nbStates);
 1193:     if (ctxt->start != NULL)
 1194: 	fprintf(output, " start: %d", ctxt->start->no);
 1195:     if (ctxt->end != NULL)
 1196: 	fprintf(output, " end: %d", ctxt->end->no);
 1197:     fprintf(output, "\n");
 1198:     for (i = 0;i < ctxt->nbStates; i++) {
 1199: 	xmlRegPrintState(output, ctxt->states[i]);
 1200:     }
 1201:     fprintf(output, "%d counters:\n", ctxt->nbCounters);
 1202:     for (i = 0;i < ctxt->nbCounters; i++) {
 1203: 	fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
 1204: 		                                ctxt->counters[i].max);
 1205:     }
 1206: }
 1207: #endif
 1208: 
 1209: /************************************************************************
 1210:  * 									*
 1211:  *		 Finite Automata structures manipulations		*
 1212:  * 									*
 1213:  ************************************************************************/
 1214: 
 1215: static void 
 1216: xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
 1217: 	           int neg, xmlRegAtomType type, int start, int end,
 1218: 		   xmlChar *blockName) {
 1219:     xmlRegRangePtr range;
 1220: 
 1221:     if (atom == NULL) {
 1222: 	ERROR("add range: atom is NULL");
 1223: 	return;
 1224:     }
 1225:     if (atom->type != XML_REGEXP_RANGES) {
 1226: 	ERROR("add range: atom is not ranges");
 1227: 	return;
 1228:     }
 1229:     if (atom->maxRanges == 0) {
 1230: 	atom->maxRanges = 4;
 1231: 	atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
 1232: 		                             sizeof(xmlRegRangePtr));
 1233: 	if (atom->ranges == NULL) {
 1234: 	    xmlRegexpErrMemory(ctxt, "adding ranges");
 1235: 	    atom->maxRanges = 0;
 1236: 	    return;
 1237: 	}
 1238:     } else if (atom->nbRanges >= atom->maxRanges) {
 1239: 	xmlRegRangePtr *tmp;
 1240: 	atom->maxRanges *= 2;
 1241: 	tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
 1242: 		                             sizeof(xmlRegRangePtr));
 1243: 	if (tmp == NULL) {
 1244: 	    xmlRegexpErrMemory(ctxt, "adding ranges");
 1245: 	    atom->maxRanges /= 2;
 1246: 	    return;
 1247: 	}
 1248: 	atom->ranges = tmp;
 1249:     }
 1250:     range = xmlRegNewRange(ctxt, neg, type, start, end);
 1251:     if (range == NULL)
 1252: 	return;
 1253:     range->blockName = blockName;
 1254:     atom->ranges[atom->nbRanges++] = range;
 1255:     
 1256: }
 1257: 
 1258: static int
 1259: xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
 1260:     if (ctxt->maxCounters == 0) {
 1261: 	ctxt->maxCounters = 4;
 1262: 	ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
 1263: 		                             sizeof(xmlRegCounter));
 1264: 	if (ctxt->counters == NULL) {
 1265: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1266: 	    ctxt->maxCounters = 0;
 1267: 	    return(-1);
 1268: 	}
 1269:     } else if (ctxt->nbCounters >= ctxt->maxCounters) {
 1270: 	xmlRegCounter *tmp;
 1271: 	ctxt->maxCounters *= 2;
 1272: 	tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
 1273: 		                           sizeof(xmlRegCounter));
 1274: 	if (tmp == NULL) {
 1275: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1276: 	    ctxt->maxCounters /= 2;
 1277: 	    return(-1);
 1278: 	}
 1279: 	ctxt->counters = tmp;
 1280:     }
 1281:     ctxt->counters[ctxt->nbCounters].min = -1;
 1282:     ctxt->counters[ctxt->nbCounters].max = -1;
 1283:     return(ctxt->nbCounters++);
 1284: }
 1285: 
 1286: static int 
 1287: xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
 1288:     if (atom == NULL) {
 1289: 	ERROR("atom push: atom is NULL");
 1290: 	return(-1);
 1291:     }
 1292:     if (ctxt->maxAtoms == 0) {
 1293: 	ctxt->maxAtoms = 4;
 1294: 	ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
 1295: 		                             sizeof(xmlRegAtomPtr));
 1296: 	if (ctxt->atoms == NULL) {
 1297: 	    xmlRegexpErrMemory(ctxt, "pushing atom");
 1298: 	    ctxt->maxAtoms = 0;
 1299: 	    return(-1);
 1300: 	}
 1301:     } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
 1302: 	xmlRegAtomPtr *tmp;
 1303: 	ctxt->maxAtoms *= 2;
 1304: 	tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
 1305: 		                             sizeof(xmlRegAtomPtr));
 1306: 	if (tmp == NULL) {
 1307: 	    xmlRegexpErrMemory(ctxt, "allocating counter");
 1308: 	    ctxt->maxAtoms /= 2;
 1309: 	    return(-1);
 1310: 	}
 1311: 	ctxt->atoms = tmp;
 1312:     }
 1313:     atom->no = ctxt->nbAtoms;
 1314:     ctxt->atoms[ctxt->nbAtoms++] = atom;
 1315:     return(0);
 1316: }
 1317: 
 1318: static void 
 1319: xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
 1320:                       int from) {
 1321:     if (target->maxTransTo == 0) {
 1322: 	target->maxTransTo = 8;
 1323: 	target->transTo = (int *) xmlMalloc(target->maxTransTo *
 1324: 		                             sizeof(int));
 1325: 	if (target->transTo == NULL) {
 1326: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1327: 	    target->maxTransTo = 0;
 1328: 	    return;
 1329: 	}
 1330:     } else if (target->nbTransTo >= target->maxTransTo) {
 1331: 	int *tmp;
 1332: 	target->maxTransTo *= 2;
 1333: 	tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
 1334: 		                             sizeof(int));
 1335: 	if (tmp == NULL) {
 1336: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1337: 	    target->maxTransTo /= 2;
 1338: 	    return;
 1339: 	}
 1340: 	target->transTo = tmp;
 1341:     }
 1342:     target->transTo[target->nbTransTo] = from;
 1343:     target->nbTransTo++;
 1344: }
 1345: 
 1346: static void 
 1347: xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 1348: 	            xmlRegAtomPtr atom, xmlRegStatePtr target,
 1349: 		    int counter, int count) {
 1350: 
 1351:     int nrtrans;
 1352: 
 1353:     if (state == NULL) {
 1354: 	ERROR("add state: state is NULL");
 1355: 	return;
 1356:     }
 1357:     if (target == NULL) {
 1358: 	ERROR("add state: target is NULL");
 1359: 	return;
 1360:     }
 1361:     /*
 1362:      * Other routines follow the philosophy 'When in doubt, add a transition'
 1363:      * so we check here whether such a transition is already present and, if
 1364:      * so, silently ignore this request.
 1365:      */
 1366: 
 1367:     for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
 1368: 	xmlRegTransPtr trans = &(state->trans[nrtrans]);
 1369: 	if ((trans->atom == atom) &&
 1370: 	    (trans->to == target->no) &&
 1371: 	    (trans->counter == counter) &&
 1372: 	    (trans->count == count)) {
 1373: #ifdef DEBUG_REGEXP_GRAPH
 1374: 	    printf("Ignoring duplicate transition from %d to %d\n",
 1375: 		    state->no, target->no);
 1376: #endif
 1377: 	    return;
 1378: 	}
 1379:     }
 1380: 
 1381:     if (state->maxTrans == 0) {
 1382: 	state->maxTrans = 8;
 1383: 	state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
 1384: 		                             sizeof(xmlRegTrans));
 1385: 	if (state->trans == NULL) {
 1386: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1387: 	    state->maxTrans = 0;
 1388: 	    return;
 1389: 	}
 1390:     } else if (state->nbTrans >= state->maxTrans) {
 1391: 	xmlRegTrans *tmp;
 1392: 	state->maxTrans *= 2;
 1393: 	tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
 1394: 		                             sizeof(xmlRegTrans));
 1395: 	if (tmp == NULL) {
 1396: 	    xmlRegexpErrMemory(ctxt, "adding transition");
 1397: 	    state->maxTrans /= 2;
 1398: 	    return;
 1399: 	}
 1400: 	state->trans = tmp;
 1401:     }
 1402: #ifdef DEBUG_REGEXP_GRAPH
 1403:     printf("Add trans from %d to %d ", state->no, target->no);
 1404:     if (count == REGEXP_ALL_COUNTER)
 1405: 	printf("all transition\n");
 1406:     else if (count >= 0)
 1407: 	printf("count based %d\n", count);
 1408:     else if (counter >= 0)
 1409: 	printf("counted %d\n", counter);
 1410:     else if (atom == NULL)
 1411: 	printf("epsilon transition\n");
 1412:     else if (atom != NULL) 
 1413:         xmlRegPrintAtom(stdout, atom);
 1414: #endif
 1415: 
 1416:     state->trans[state->nbTrans].atom = atom;
 1417:     state->trans[state->nbTrans].to = target->no;
 1418:     state->trans[state->nbTrans].counter = counter;
 1419:     state->trans[state->nbTrans].count = count;
 1420:     state->trans[state->nbTrans].nd = 0;
 1421:     state->nbTrans++;
 1422:     xmlRegStateAddTransTo(ctxt, target, state->no);
 1423: }
 1424: 
 1425: static int
 1426: xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
 1427:     if (state == NULL) return(-1);
 1428:     if (ctxt->maxStates == 0) {
 1429: 	ctxt->maxStates = 4;
 1430: 	ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
 1431: 		                             sizeof(xmlRegStatePtr));
 1432: 	if (ctxt->states == NULL) {
 1433: 	    xmlRegexpErrMemory(ctxt, "adding state");
 1434: 	    ctxt->maxStates = 0;
 1435: 	    return(-1);
 1436: 	}
 1437:     } else if (ctxt->nbStates >= ctxt->maxStates) {
 1438: 	xmlRegStatePtr *tmp;
 1439: 	ctxt->maxStates *= 2;
 1440: 	tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
 1441: 		                             sizeof(xmlRegStatePtr));
 1442: 	if (tmp == NULL) {
 1443: 	    xmlRegexpErrMemory(ctxt, "adding state");
 1444: 	    ctxt->maxStates /= 2;
 1445: 	    return(-1);
 1446: 	}
 1447: 	ctxt->states = tmp;
 1448:     }
 1449:     state->no = ctxt->nbStates;
 1450:     ctxt->states[ctxt->nbStates++] = state;
 1451:     return(0);
 1452: }
 1453: 
 1454: /**
 1455:  * xmlFAGenerateAllTransition:
 1456:  * @ctxt:  a regexp parser context
 1457:  * @from:  the from state
 1458:  * @to:  the target state or NULL for building a new one
 1459:  * @lax:
 1460:  *
 1461:  */
 1462: static void
 1463: xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
 1464: 			   xmlRegStatePtr from, xmlRegStatePtr to,
 1465: 			   int lax) {
 1466:     if (to == NULL) {
 1467: 	to = xmlRegNewState(ctxt);
 1468: 	xmlRegStatePush(ctxt, to);
 1469: 	ctxt->state = to;
 1470:     }
 1471:     if (lax)
 1472: 	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER);
 1473:     else
 1474: 	xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
 1475: }
 1476: 
 1477: /**
 1478:  * xmlFAGenerateEpsilonTransition:
 1479:  * @ctxt:  a regexp parser context
 1480:  * @from:  the from state
 1481:  * @to:  the target state or NULL for building a new one
 1482:  *
 1483:  */
 1484: static void
 1485: xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
 1486: 			       xmlRegStatePtr from, xmlRegStatePtr to) {
 1487:     if (to == NULL) {
 1488: 	to = xmlRegNewState(ctxt);
 1489: 	xmlRegStatePush(ctxt, to);
 1490: 	ctxt->state = to;
 1491:     }
 1492:     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
 1493: }
 1494: 
 1495: /**
 1496:  * xmlFAGenerateCountedEpsilonTransition:
 1497:  * @ctxt:  a regexp parser context
 1498:  * @from:  the from state
 1499:  * @to:  the target state or NULL for building a new one
 1500:  * counter:  the counter for that transition
 1501:  *
 1502:  */
 1503: static void
 1504: xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
 1505: 	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
 1506:     if (to == NULL) {
 1507: 	to = xmlRegNewState(ctxt);
 1508: 	xmlRegStatePush(ctxt, to);
 1509: 	ctxt->state = to;
 1510:     }
 1511:     xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
 1512: }
 1513: 
 1514: /**
 1515:  * xmlFAGenerateCountedTransition:
 1516:  * @ctxt:  a regexp parser context
 1517:  * @from:  the from state
 1518:  * @to:  the target state or NULL for building a new one
 1519:  * counter:  the counter for that transition
 1520:  *
 1521:  */
 1522: static void
 1523: xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
 1524: 	    xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
 1525:     if (to == NULL) {
 1526: 	to = xmlRegNewState(ctxt);
 1527: 	xmlRegStatePush(ctxt, to);
 1528: 	ctxt->state = to;
 1529:     }
 1530:     xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
 1531: }
 1532: 
 1533: /**
 1534:  * xmlFAGenerateTransitions:
 1535:  * @ctxt:  a regexp parser context
 1536:  * @from:  the from state
 1537:  * @to:  the target state or NULL for building a new one
 1538:  * @atom:  the atom generating the transition
 1539:  *
 1540:  * Returns 0 if success and -1 in case of error.
 1541:  */
 1542: static int
 1543: xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
 1544: 	                 xmlRegStatePtr to, xmlRegAtomPtr atom) {
 1545:     xmlRegStatePtr end;
 1546: 
 1547:     if (atom == NULL) {
 1548: 	ERROR("genrate transition: atom == NULL");
 1549: 	return(-1);
 1550:     }
 1551:     if (atom->type == XML_REGEXP_SUBREG) {
 1552: 	/*
 1553: 	 * this is a subexpression handling one should not need to
 1554: 	 * create a new node except for XML_REGEXP_QUANT_RANGE.
 1555: 	 */
 1556: 	if (xmlRegAtomPush(ctxt, atom) < 0) {
 1557: 	    return(-1);
 1558: 	}
 1559: 	if ((to != NULL) && (atom->stop != to) &&
 1560: 	    (atom->quant != XML_REGEXP_QUANT_RANGE)) {
 1561: 	    /*
 1562: 	     * Generate an epsilon transition to link to the target
 1563: 	     */
 1564: 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 1565: #ifdef DV
 1566: 	} else if ((to == NULL) && (atom->quant != XML_REGEXP_QUANT_RANGE) && 
 1567: 		   (atom->quant != XML_REGEXP_QUANT_ONCE)) {
 1568: 	    to = xmlRegNewState(ctxt);
 1569: 	    xmlRegStatePush(ctxt, to);
 1570: 	    ctxt->state = to;
 1571: 	    xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
 1572: #endif
 1573: 	}
 1574: 	switch (atom->quant) {
 1575: 	    case XML_REGEXP_QUANT_OPT:
 1576: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1577: 		/*
 1578: 		 * transition done to the state after end of atom.
 1579: 		 *      1. set transition from atom start to new state
 1580: 		 *      2. set transition from atom end to this state. 
 1581: 		 */
 1582:                 if (to == NULL) {
 1583:                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, 0);
 1584:                     xmlFAGenerateEpsilonTransition(ctxt, atom->stop,
 1585:                                                    ctxt->state);
 1586:                 } else {
 1587:                     xmlFAGenerateEpsilonTransition(ctxt, atom->start, to);
 1588:                 }
 1589: 		break;
 1590: 	    case XML_REGEXP_QUANT_MULT:
 1591: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1592: 		xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
 1593: 		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
 1594: 		break;
 1595: 	    case XML_REGEXP_QUANT_PLUS:
 1596: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1597: 		xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
 1598: 		break;
 1599: 	    case XML_REGEXP_QUANT_RANGE: {
 1600: 		int counter;
 1601: 		xmlRegStatePtr inter, newstate;
 1602: 
 1603: 		/*
 1604: 		 * create the final state now if needed
 1605: 		 */
 1606: 		if (to != NULL) {
 1607: 		    newstate = to;
 1608: 		} else {
 1609: 		    newstate = xmlRegNewState(ctxt);
 1610: 		    xmlRegStatePush(ctxt, newstate);
 1611: 		}
 1612: 
 1613: 		/*
 1614: 		 * The principle here is to use counted transition
 1615: 		 * to avoid explosion in the number of states in the
 1616: 		 * graph. This is clearly more complex but should not
 1617: 		 * be exploitable at runtime.
 1618: 		 */
 1619: 		if ((atom->min == 0) && (atom->start0 == NULL)) {
 1620: 		    xmlRegAtomPtr copy;
 1621: 		    /*
 1622: 		     * duplicate a transition based on atom to count next
 1623: 		     * occurences after 1. We cannot loop to atom->start
 1624: 		     * directly because we need an epsilon transition to 
 1625: 		     * newstate.
 1626: 		     */
 1627: 		     /* ???? For some reason it seems we never reach that
 1628: 		        case, I suppose this got optimized out before when
 1629: 			building the automata */
 1630: 		    copy = xmlRegCopyAtom(ctxt, atom);
 1631: 		    if (copy == NULL)
 1632: 		        return(-1);
 1633: 		    copy->quant = XML_REGEXP_QUANT_ONCE;
 1634: 		    copy->min = 0;
 1635: 		    copy->max = 0;
 1636: 
 1637: 		    if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
 1638: 		        < 0)
 1639: 			return(-1);
 1640: 		    inter = ctxt->state;
 1641: 		    counter = xmlRegGetCounter(ctxt);
 1642: 		    ctxt->counters[counter].min = atom->min - 1;
 1643: 		    ctxt->counters[counter].max = atom->max - 1;
 1644: 		    /* count the number of times we see it again */
 1645: 		    xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
 1646: 						   atom->stop, counter);
 1647: 		    /* allow a way out based on the count */
 1648: 		    xmlFAGenerateCountedTransition(ctxt, inter,
 1649: 			                           newstate, counter);
 1650: 		    /* and also allow a direct exit for 0 */
 1651: 		    xmlFAGenerateEpsilonTransition(ctxt, atom->start,
 1652: 		                                   newstate);
 1653: 		} else {
 1654: 		    /*
 1655: 		     * either we need the atom at least once or there
 1656: 		     * is an atom->start0 allowing to easilly plug the
 1657: 		     * epsilon transition.
 1658: 		     */
 1659: 		    counter = xmlRegGetCounter(ctxt);
 1660: 		    ctxt->counters[counter].min = atom->min - 1;
 1661: 		    ctxt->counters[counter].max = atom->max - 1;
 1662: 		    /* count the number of times we see it again */
 1663: 		    xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
 1664: 						   atom->start, counter);
 1665: 		    /* allow a way out based on the count */
 1666: 		    xmlFAGenerateCountedTransition(ctxt, atom->stop,
 1667: 			                           newstate, counter);
 1668: 		    /* and if needed allow a direct exit for 0 */
 1669: 		    if (atom->min == 0)
 1670: 			xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
 1671: 						       newstate);
 1672: 
 1673: 		}
 1674: 		atom->min = 0;
 1675: 		atom->max = 0;
 1676: 		atom->quant = XML_REGEXP_QUANT_ONCE;
 1677: 		ctxt->state = newstate;
 1678: 	    }
 1679: 	    default:
 1680: 		break;
 1681: 	}
 1682: 	return(0);
 1683:     } 
 1684:     if ((atom->min == 0) && (atom->max == 0) &&
 1685:                (atom->quant == XML_REGEXP_QUANT_RANGE)) {
 1686:         /*
 1687: 	 * we can discard the atom and generate an epsilon transition instead
 1688: 	 */
 1689: 	if (to == NULL) {
 1690: 	    to = xmlRegNewState(ctxt);
 1691: 	    if (to != NULL)
 1692: 		xmlRegStatePush(ctxt, to);
 1693: 	    else {
 1694: 		return(-1);
 1695: 	    }
 1696: 	}
 1697: 	xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1698: 	ctxt->state = to;
 1699: 	xmlRegFreeAtom(atom);
 1700: 	return(0);
 1701:     }
 1702:     if (to == NULL) {
 1703: 	to = xmlRegNewState(ctxt);
 1704: 	if (to != NULL)
 1705: 	    xmlRegStatePush(ctxt, to);
 1706: 	else {
 1707: 	    return(-1);
 1708: 	}
 1709:     } 
 1710:     end = to;
 1711:     if ((atom->quant == XML_REGEXP_QUANT_MULT) || 
 1712:         (atom->quant == XML_REGEXP_QUANT_PLUS)) {
 1713: 	/*
 1714: 	 * Do not pollute the target state by adding transitions from
 1715: 	 * it as it is likely to be the shared target of multiple branches.
 1716: 	 * So isolate with an epsilon transition.
 1717: 	 */
 1718:         xmlRegStatePtr tmp;
 1719: 	
 1720: 	tmp = xmlRegNewState(ctxt);
 1721: 	if (tmp != NULL)
 1722: 	    xmlRegStatePush(ctxt, tmp);
 1723: 	else {
 1724: 	    return(-1);
 1725: 	}
 1726: 	xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
 1727: 	to = tmp;
 1728:     }
 1729:     if (xmlRegAtomPush(ctxt, atom) < 0) {
 1730: 	return(-1);
 1731:     }
 1732:     xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
 1733:     ctxt->state = end;
 1734:     switch (atom->quant) {
 1735: 	case XML_REGEXP_QUANT_OPT:
 1736: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1737: 	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1738: 	    break;
 1739: 	case XML_REGEXP_QUANT_MULT:
 1740: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1741: 	    xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1742: 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 1743: 	    break;
 1744: 	case XML_REGEXP_QUANT_PLUS:
 1745: 	    atom->quant = XML_REGEXP_QUANT_ONCE;
 1746: 	    xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
 1747: 	    break;
 1748: 	case XML_REGEXP_QUANT_RANGE: 
 1749: #if DV_test
 1750: 	    if (atom->min == 0) {
 1751: 		xmlFAGenerateEpsilonTransition(ctxt, from, to);
 1752: 	    }
 1753: #endif
 1754: 	    break;
 1755: 	default:
 1756: 	    break;
 1757:     }
 1758:     return(0);
 1759: }
 1760: 
 1761: /**
 1762:  * xmlFAReduceEpsilonTransitions:
 1763:  * @ctxt:  a regexp parser context
 1764:  * @fromnr:  the from state
 1765:  * @tonr:  the to state 
 1766:  * @counter:  should that transition be associated to a counted
 1767:  *
 1768:  */
 1769: static void
 1770: xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
 1771: 	                      int tonr, int counter) {
 1772:     int transnr;
 1773:     xmlRegStatePtr from;
 1774:     xmlRegStatePtr to;
 1775: 
 1776: #ifdef DEBUG_REGEXP_GRAPH
 1777:     printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
 1778: #endif
 1779:     from = ctxt->states[fromnr];
 1780:     if (from == NULL)
 1781: 	return;
 1782:     to = ctxt->states[tonr];
 1783:     if (to == NULL)
 1784: 	return;
 1785:     if ((to->mark == XML_REGEXP_MARK_START) ||
 1786: 	(to->mark == XML_REGEXP_MARK_VISITED))
 1787: 	return;
 1788: 
 1789:     to->mark = XML_REGEXP_MARK_VISITED;
 1790:     if (to->type == XML_REGEXP_FINAL_STATE) {
 1791: #ifdef DEBUG_REGEXP_GRAPH
 1792: 	printf("State %d is final, so %d becomes final\n", tonr, fromnr);
 1793: #endif
 1794: 	from->type = XML_REGEXP_FINAL_STATE;
 1795:     }
 1796:     for (transnr = 0;transnr < to->nbTrans;transnr++) {
 1797:         if (to->trans[transnr].to < 0)
 1798: 	    continue;
 1799: 	if (to->trans[transnr].atom == NULL) {
 1800: 	    /*
 1801: 	     * Don't remove counted transitions
 1802: 	     * Don't loop either
 1803: 	     */
 1804: 	    if (to->trans[transnr].to != fromnr) {
 1805: 		if (to->trans[transnr].count >= 0) {
 1806: 		    int newto = to->trans[transnr].to;
 1807: 
 1808: 		    xmlRegStateAddTrans(ctxt, from, NULL,
 1809: 					ctxt->states[newto], 
 1810: 					-1, to->trans[transnr].count);
 1811: 		} else {
 1812: #ifdef DEBUG_REGEXP_GRAPH
 1813: 		    printf("Found epsilon trans %d from %d to %d\n",
 1814: 			   transnr, tonr, to->trans[transnr].to);
 1815: #endif
 1816: 		    if (to->trans[transnr].counter >= 0) {
 1817: 			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
 1818: 					      to->trans[transnr].to,
 1819: 					      to->trans[transnr].counter);
 1820: 		    } else {
 1821: 			xmlFAReduceEpsilonTransitions(ctxt, fromnr,
 1822: 					      to->trans[transnr].to,
 1823: 					      counter);
 1824: 		    }
 1825: 		}
 1826: 	    }
 1827: 	} else {
 1828: 	    int newto = to->trans[transnr].to;
 1829: 
 1830: 	    if (to->trans[transnr].counter >= 0) {
 1831: 		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
 1832: 				    ctxt->states[newto], 
 1833: 				    to->trans[transnr].counter, -1);
 1834: 	    } else {
 1835: 		xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, 
 1836: 				    ctxt->states[newto], counter, -1);
 1837: 	    }
 1838: 	}
 1839:     }
 1840:     to->mark = XML_REGEXP_MARK_NORMAL;
 1841: }
 1842: 
 1843: /**
 1844:  * xmlFAEliminateSimpleEpsilonTransitions:
 1845:  * @ctxt:  a regexp parser context
 1846:  *
 1847:  * Eliminating general epsilon transitions can get costly in the general 
 1848:  * algorithm due to the large amount of generated new transitions and
 1849:  * associated comparisons. However for simple epsilon transition used just
 1850:  * to separate building blocks when generating the automata this can be
 1851:  * reduced to state elimination:
 1852:  *    - if there exists an epsilon from X to Y
 1853:  *    - if there is no other transition from X
 1854:  * then X and Y are semantically equivalent and X can be eliminated
 1855:  * If X is the start state then make Y the start state, else replace the
 1856:  * target of all transitions to X by transitions to Y.
 1857:  */
 1858: static void
 1859: xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 1860:     int statenr, i, j, newto;
 1861:     xmlRegStatePtr state, tmp;
 1862: 
 1863:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1864: 	state = ctxt->states[statenr];
 1865: 	if (state == NULL)
 1866: 	    continue;
 1867: 	if (state->nbTrans != 1)
 1868: 	    continue;
 1869: 	if (state->type == XML_REGEXP_UNREACH_STATE)
 1870: 	    continue;
 1871: 	/* is the only transition out a basic transition */
 1872: 	if ((state->trans[0].atom == NULL) &&
 1873: 	    (state->trans[0].to >= 0) &&
 1874: 	    (state->trans[0].to != statenr) &&
 1875: 	    (state->trans[0].counter < 0) &&
 1876: 	    (state->trans[0].count < 0)) {
 1877: 	    newto = state->trans[0].to;
 1878: 
 1879:             if (state->type == XML_REGEXP_START_STATE) {
 1880: #ifdef DEBUG_REGEXP_GRAPH
 1881: 		printf("Found simple epsilon trans from start %d to %d\n",
 1882: 		       statenr, newto);
 1883: #endif     
 1884:             } else {
 1885: #ifdef DEBUG_REGEXP_GRAPH
 1886: 		printf("Found simple epsilon trans from %d to %d\n",
 1887: 		       statenr, newto);
 1888: #endif     
 1889: 	        for (i = 0;i < state->nbTransTo;i++) {
 1890: 		    tmp = ctxt->states[state->transTo[i]];
 1891: 		    for (j = 0;j < tmp->nbTrans;j++) {
 1892: 			if (tmp->trans[j].to == statenr) {
 1893: #ifdef DEBUG_REGEXP_GRAPH
 1894: 			    printf("Changed transition %d on %d to go to %d\n",
 1895: 				   j, tmp->no, newto);
 1896: #endif     
 1897: 			    tmp->trans[j].to = -1;
 1898: 			    xmlRegStateAddTrans(ctxt, tmp, tmp->trans[j].atom,
 1899: 			    			ctxt->states[newto],
 1900: 					        tmp->trans[j].counter,
 1901: 						tmp->trans[j].count);
 1902: 			}
 1903: 		    }
 1904: 		}
 1905: 		if (state->type == XML_REGEXP_FINAL_STATE)
 1906: 		    ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
 1907: 		/* eliminate the transition completely */
 1908: 		state->nbTrans = 0;
 1909: 
 1910:                 state->type = XML_REGEXP_UNREACH_STATE;
 1911: 
 1912: 	    }
 1913:             
 1914: 	}
 1915:     }
 1916: }
 1917: /**
 1918:  * xmlFAEliminateEpsilonTransitions:
 1919:  * @ctxt:  a regexp parser context
 1920:  *
 1921:  */
 1922: static void
 1923: xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
 1924:     int statenr, transnr;
 1925:     xmlRegStatePtr state;
 1926:     int has_epsilon;
 1927: 
 1928:     if (ctxt->states == NULL) return;
 1929: 
 1930:     /*
 1931:      * Eliminate simple epsilon transition and the associated unreachable
 1932:      * states.
 1933:      */
 1934:     xmlFAEliminateSimpleEpsilonTransitions(ctxt);
 1935:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 1936: 	state = ctxt->states[statenr];
 1937: 	if ((state != NULL) && (state->type == XML_REGEXP_UNREACH_STATE)) {
 1938: #ifdef DEBUG_REGEXP_GRAPH
 1939: 	    printf("Removed unreachable state %d\n", statenr);
 1940: #endif
 1941: 	    xmlRegFreeState(state);
 1942: 	    ctxt->states[statenr] = NULL;
 1943: 	}
 1944:     }
 1945: 
 1946:     has_epsilon = 0;
 1947: 
 1948:     /*
 1949:      * Build the completed transitions bypassing the epsilons
 1950:      * Use a marking algorithm to avoid loops
 1951:      * Mark sink states too.
 1952:      * Process from the latests states backward to the start when
 1953:      * there is long cascading epsilon chains this minimize the
 1954:      * recursions and transition compares when adding the new ones
 1955:      */
 1956:     for (statenr = ctxt->nbStates - 1;statenr >= 0;statenr--) {
 1957: 	state = ctxt->states[statenr];
 1958: 	if (state == NULL)
 1959: 	    continue;
 1960: 	if ((state->nbTrans == 0) &&
 1961: 	    (state->type != XML_REGEXP_FINAL_STATE)) {
 1962: 	    state->type = XML_REGEXP_SINK_STATE;
 1963: 	}
 1964: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 1965: 	    if ((state->trans[transnr].atom == NULL) &&
 1966: 		(state->trans[transnr].to >= 0)) {
 1967: 		if (state->trans[transnr].to == statenr) {
 1968: 		    state->trans[transnr].to = -1;
 1969: #ifdef DEBUG_REGEXP_GRAPH
 1970: 		    printf("Removed loopback epsilon trans %d on %d\n",
 1971: 			   transnr, statenr);
 1972: #endif
 1973: 		} else if (state->trans[transnr].count < 0) {
 1974: 		    int newto = state->trans[transnr].to;
 1975: 
 1976: #ifdef DEBUG_REGEXP_GRAPH
 1977: 		    printf("Found epsilon trans %d from %d to %d\n",
 1978: 			   transnr, statenr, newto);
 1979: #endif
 1980: 		    has_epsilon = 1;
 1981: 		    state->trans[transnr].to = -2;
 1982: 		    state->mark = XML_REGEXP_MARK_START;
 1983: 		    xmlFAReduceEpsilonTransitions(ctxt, statenr,
 1984: 				      newto, state->trans[transnr].counter);
 1985: 		    state->mark = XML_REGEXP_MARK_NORMAL;
 1986: #ifdef DEBUG_REGEXP_GRAPH
 1987: 		} else {
 1988: 		    printf("Found counted transition %d on %d\n",
 1989: 			   transnr, statenr);
 1990: #endif
 1991: 	        }
 1992: 	    }
 1993: 	}
 1994:     }
 1995:     /*
 1996:      * Eliminate the epsilon transitions
 1997:      */
 1998:     if (has_epsilon) {
 1999: 	for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2000: 	    state = ctxt->states[statenr];
 2001: 	    if (state == NULL)
 2002: 		continue;
 2003: 	    for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2004: 		xmlRegTransPtr trans = &(state->trans[transnr]);
 2005: 		if ((trans->atom == NULL) &&
 2006: 		    (trans->count < 0) &&
 2007: 		    (trans->to >= 0)) {
 2008: 		    trans->to = -1;
 2009: 		}
 2010: 	    }
 2011: 	}
 2012:     }
 2013: 
 2014:     /*
 2015:      * Use this pass to detect unreachable states too
 2016:      */
 2017:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2018: 	state = ctxt->states[statenr];
 2019: 	if (state != NULL)
 2020: 	    state->reached = XML_REGEXP_MARK_NORMAL;
 2021:     }
 2022:     state = ctxt->states[0];
 2023:     if (state != NULL)
 2024: 	state->reached = XML_REGEXP_MARK_START;
 2025:     while (state != NULL) {
 2026: 	xmlRegStatePtr target = NULL;
 2027: 	state->reached = XML_REGEXP_MARK_VISITED;
 2028: 	/*
 2029: 	 * Mark all states reachable from the current reachable state
 2030: 	 */
 2031: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2032: 	    if ((state->trans[transnr].to >= 0) &&
 2033: 		((state->trans[transnr].atom != NULL) ||
 2034: 		 (state->trans[transnr].count >= 0))) {
 2035: 		int newto = state->trans[transnr].to;
 2036: 
 2037: 		if (ctxt->states[newto] == NULL)
 2038: 		    continue;
 2039: 		if (ctxt->states[newto]->reached == XML_REGEXP_MARK_NORMAL) {
 2040: 		    ctxt->states[newto]->reached = XML_REGEXP_MARK_START;
 2041: 		    target = ctxt->states[newto];
 2042: 		}
 2043: 	    }
 2044: 	}
 2045: 
 2046: 	/*
 2047: 	 * find the next accessible state not explored
 2048: 	 */
 2049: 	if (target == NULL) {
 2050: 	    for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
 2051: 		state = ctxt->states[statenr];
 2052: 		if ((state != NULL) && (state->reached ==
 2053: 			XML_REGEXP_MARK_START)) {
 2054: 		    target = state;
 2055: 		    break;
 2056: 		}
 2057: 	    }
 2058: 	}
 2059: 	state = target;
 2060:     }
 2061:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2062: 	state = ctxt->states[statenr];
 2063: 	if ((state != NULL) && (state->reached == XML_REGEXP_MARK_NORMAL)) {
 2064: #ifdef DEBUG_REGEXP_GRAPH
 2065: 	    printf("Removed unreachable state %d\n", statenr);
 2066: #endif
 2067: 	    xmlRegFreeState(state);
 2068: 	    ctxt->states[statenr] = NULL;
 2069: 	}
 2070:     }
 2071: 
 2072: }
 2073: 
 2074: static int
 2075: xmlFACompareRanges(xmlRegRangePtr range1, xmlRegRangePtr range2) {
 2076:     int ret = 0;
 2077: 
 2078:     if ((range1->type == XML_REGEXP_RANGES) ||
 2079:         (range2->type == XML_REGEXP_RANGES) ||
 2080:         (range2->type == XML_REGEXP_SUBREG) ||
 2081:         (range1->type == XML_REGEXP_SUBREG) ||
 2082:         (range1->type == XML_REGEXP_STRING) ||
 2083:         (range2->type == XML_REGEXP_STRING))
 2084: 	return(-1);
 2085: 
 2086:     /* put them in order */
 2087:     if (range1->type > range2->type) {
 2088:         xmlRegRangePtr tmp;
 2089: 
 2090: 	tmp = range1;
 2091: 	range1 = range2;
 2092: 	range2 = tmp;
 2093:     }
 2094:     if ((range1->type == XML_REGEXP_ANYCHAR) ||
 2095:         (range2->type == XML_REGEXP_ANYCHAR)) {
 2096: 	ret = 1;
 2097:     } else if ((range1->type == XML_REGEXP_EPSILON) ||
 2098:                (range2->type == XML_REGEXP_EPSILON)) {
 2099: 	return(0);
 2100:     } else if (range1->type == range2->type) {
 2101:         if (range1->type != XML_REGEXP_CHARVAL)
 2102:             ret = 1;
 2103:         else if ((range1->end < range2->start) ||
 2104: 	         (range2->end < range1->start))
 2105: 	    ret = 0;
 2106: 	else
 2107: 	    ret = 1;
 2108:     } else if (range1->type == XML_REGEXP_CHARVAL) {
 2109:         int codepoint;
 2110: 	int neg = 0;
 2111: 
 2112: 	/*
 2113: 	 * just check all codepoints in the range for acceptance,
 2114: 	 * this is usually way cheaper since done only once at
 2115: 	 * compilation than testing over and over at runtime or 
 2116: 	 * pushing too many states when evaluating.
 2117: 	 */
 2118: 	if (((range1->neg == 0) && (range2->neg != 0)) ||
 2119: 	    ((range1->neg != 0) && (range2->neg == 0)))
 2120: 	    neg = 1;
 2121: 
 2122: 	for (codepoint = range1->start;codepoint <= range1->end ;codepoint++) {
 2123: 	    ret = xmlRegCheckCharacterRange(range2->type, codepoint,
 2124: 					    0, range2->start, range2->end,
 2125: 					    range2->blockName);
 2126: 	    if (ret < 0)
 2127: 	        return(-1);
 2128: 	    if (((neg == 1) && (ret == 0)) ||
 2129: 	        ((neg == 0) && (ret == 1)))
 2130: 		return(1);
 2131: 	}
 2132: 	return(0);
 2133:     } else if ((range1->type == XML_REGEXP_BLOCK_NAME) ||
 2134:                (range2->type == XML_REGEXP_BLOCK_NAME)) {
 2135: 	if (range1->type == range2->type) {
 2136: 	    ret = xmlStrEqual(range1->blockName, range2->blockName);
 2137: 	} else {
 2138: 	    /*
 2139: 	     * comparing a block range with anything else is way
 2140: 	     * too costly, and maintining the table is like too much
 2141: 	     * memory too, so let's force the automata to save state
 2142: 	     * here.
 2143: 	     */
 2144: 	    return(1);
 2145: 	}
 2146:     } else if ((range1->type < XML_REGEXP_LETTER) ||
 2147:                (range2->type < XML_REGEXP_LETTER)) {
 2148: 	if ((range1->type == XML_REGEXP_ANYSPACE) &&
 2149: 	    (range2->type == XML_REGEXP_NOTSPACE))
 2150: 	    ret = 0;
 2151: 	else if ((range1->type == XML_REGEXP_INITNAME) &&
 2152: 	         (range2->type == XML_REGEXP_NOTINITNAME))
 2153: 	    ret = 0;
 2154: 	else if ((range1->type == XML_REGEXP_NAMECHAR) &&
 2155: 	         (range2->type == XML_REGEXP_NOTNAMECHAR))
 2156: 	    ret = 0;
 2157: 	else if ((range1->type == XML_REGEXP_DECIMAL) &&
 2158: 	         (range2->type == XML_REGEXP_NOTDECIMAL))
 2159: 	    ret = 0;
 2160: 	else if ((range1->type == XML_REGEXP_REALCHAR) &&
 2161: 	         (range2->type == XML_REGEXP_NOTREALCHAR))
 2162: 	    ret = 0;
 2163: 	else {
 2164: 	    /* same thing to limit complexity */
 2165: 	    return(1);
 2166: 	}
 2167:     } else {
 2168:         ret = 0;
 2169:         /* range1->type < range2->type here */
 2170:         switch (range1->type) {
 2171: 	    case XML_REGEXP_LETTER:
 2172: 	         /* all disjoint except in the subgroups */
 2173: 	         if ((range2->type == XML_REGEXP_LETTER_UPPERCASE) ||
 2174: 		     (range2->type == XML_REGEXP_LETTER_LOWERCASE) ||
 2175: 		     (range2->type == XML_REGEXP_LETTER_TITLECASE) ||
 2176: 		     (range2->type == XML_REGEXP_LETTER_MODIFIER) ||
 2177: 		     (range2->type == XML_REGEXP_LETTER_OTHERS))
 2178: 		     ret = 1;
 2179: 		 break;
 2180: 	    case XML_REGEXP_MARK:
 2181: 	         if ((range2->type == XML_REGEXP_MARK_NONSPACING) ||
 2182: 		     (range2->type == XML_REGEXP_MARK_SPACECOMBINING) ||
 2183: 		     (range2->type == XML_REGEXP_MARK_ENCLOSING))
 2184: 		     ret = 1;
 2185: 		 break;
 2186: 	    case XML_REGEXP_NUMBER:
 2187: 	         if ((range2->type == XML_REGEXP_NUMBER_DECIMAL) ||
 2188: 		     (range2->type == XML_REGEXP_NUMBER_LETTER) ||
 2189: 		     (range2->type == XML_REGEXP_NUMBER_OTHERS))
 2190: 		     ret = 1;
 2191: 		 break;
 2192: 	    case XML_REGEXP_PUNCT:
 2193: 	         if ((range2->type == XML_REGEXP_PUNCT_CONNECTOR) ||
 2194: 		     (range2->type == XML_REGEXP_PUNCT_DASH) ||
 2195: 		     (range2->type == XML_REGEXP_PUNCT_OPEN) ||
 2196: 		     (range2->type == XML_REGEXP_PUNCT_CLOSE) ||
 2197: 		     (range2->type == XML_REGEXP_PUNCT_INITQUOTE) ||
 2198: 		     (range2->type == XML_REGEXP_PUNCT_FINQUOTE) ||
 2199: 		     (range2->type == XML_REGEXP_PUNCT_OTHERS))
 2200: 		     ret = 1;
 2201: 		 break;
 2202: 	    case XML_REGEXP_SEPAR:
 2203: 	         if ((range2->type == XML_REGEXP_SEPAR_SPACE) ||
 2204: 		     (range2->type == XML_REGEXP_SEPAR_LINE) ||
 2205: 		     (range2->type == XML_REGEXP_SEPAR_PARA))
 2206: 		     ret = 1;
 2207: 		 break;
 2208: 	    case XML_REGEXP_SYMBOL:
 2209: 	         if ((range2->type == XML_REGEXP_SYMBOL_MATH) ||
 2210: 		     (range2->type == XML_REGEXP_SYMBOL_CURRENCY) ||
 2211: 		     (range2->type == XML_REGEXP_SYMBOL_MODIFIER) ||
 2212: 		     (range2->type == XML_REGEXP_SYMBOL_OTHERS))
 2213: 		     ret = 1;
 2214: 		 break;
 2215: 	    case XML_REGEXP_OTHER:
 2216: 	         if ((range2->type == XML_REGEXP_OTHER_CONTROL) ||
 2217: 		     (range2->type == XML_REGEXP_OTHER_FORMAT) ||
 2218: 		     (range2->type == XML_REGEXP_OTHER_PRIVATE))
 2219: 		     ret = 1;
 2220: 		 break;
 2221:             default:
 2222: 	         if ((range2->type >= XML_REGEXP_LETTER) &&
 2223: 		     (range2->type < XML_REGEXP_BLOCK_NAME))
 2224: 		     ret = 0;
 2225: 		 else {
 2226: 		     /* safety net ! */
 2227: 		     return(1);
 2228: 		 }
 2229: 	}
 2230:     }
 2231:     if (((range1->neg == 0) && (range2->neg != 0)) ||
 2232:         ((range1->neg != 0) && (range2->neg == 0)))
 2233: 	ret = !ret;
 2234:     return(ret);
 2235: }
 2236: 
 2237: /**
 2238:  * xmlFACompareAtomTypes:
 2239:  * @type1:  an atom type
 2240:  * @type2:  an atom type
 2241:  *
 2242:  * Compares two atoms type to check whether they intersect in some ways,
 2243:  * this is used by xmlFACompareAtoms only
 2244:  *
 2245:  * Returns 1 if they may intersect and 0 otherwise
 2246:  */
 2247: static int
 2248: xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
 2249:     if ((type1 == XML_REGEXP_EPSILON) ||
 2250:         (type1 == XML_REGEXP_CHARVAL) ||
 2251: 	(type1 == XML_REGEXP_RANGES) ||
 2252: 	(type1 == XML_REGEXP_SUBREG) ||
 2253: 	(type1 == XML_REGEXP_STRING) ||
 2254: 	(type1 == XML_REGEXP_ANYCHAR))
 2255: 	return(1);
 2256:     if ((type2 == XML_REGEXP_EPSILON) ||
 2257:         (type2 == XML_REGEXP_CHARVAL) ||
 2258: 	(type2 == XML_REGEXP_RANGES) ||
 2259: 	(type2 == XML_REGEXP_SUBREG) ||
 2260: 	(type2 == XML_REGEXP_STRING) ||
 2261: 	(type2 == XML_REGEXP_ANYCHAR))
 2262: 	return(1);
 2263: 
 2264:     if (type1 == type2) return(1);
 2265: 
 2266:     /* simplify subsequent compares by making sure type1 < type2 */
 2267:     if (type1 > type2) {
 2268:         xmlRegAtomType tmp = type1;
 2269: 	type1 = type2;
 2270: 	type2 = tmp;
 2271:     }
 2272:     switch (type1) {
 2273:         case XML_REGEXP_ANYSPACE: /* \s */
 2274: 	    /* can't be a letter, number, mark, pontuation, symbol */
 2275: 	    if ((type2 == XML_REGEXP_NOTSPACE) ||
 2276: 		((type2 >= XML_REGEXP_LETTER) &&
 2277: 		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
 2278: 	        ((type2 >= XML_REGEXP_NUMBER) &&
 2279: 		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
 2280: 	        ((type2 >= XML_REGEXP_MARK) &&
 2281: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2282: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2283: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2284: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2285: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS))
 2286: 	        ) return(0);
 2287: 	    break;
 2288:         case XML_REGEXP_NOTSPACE: /* \S */
 2289: 	    break;
 2290:         case XML_REGEXP_INITNAME: /* \l */
 2291: 	    /* can't be a number, mark, separator, pontuation, symbol or other */
 2292: 	    if ((type2 == XML_REGEXP_NOTINITNAME) ||
 2293: 	        ((type2 >= XML_REGEXP_NUMBER) &&
 2294: 		 (type2 <= XML_REGEXP_NUMBER_OTHERS)) ||
 2295: 	        ((type2 >= XML_REGEXP_MARK) &&
 2296: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2297: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2298: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2299: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2300: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2301: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2302: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2303: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2304: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2305: 		) return(0);
 2306: 	    break;
 2307:         case XML_REGEXP_NOTINITNAME: /* \L */
 2308: 	    break;
 2309:         case XML_REGEXP_NAMECHAR: /* \c */
 2310: 	    /* can't be a mark, separator, pontuation, symbol or other */
 2311: 	    if ((type2 == XML_REGEXP_NOTNAMECHAR) ||
 2312: 	        ((type2 >= XML_REGEXP_MARK) &&
 2313: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2314: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2315: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2316: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2317: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2318: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2319: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2320: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2321: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2322: 		) return(0);
 2323: 	    break;
 2324:         case XML_REGEXP_NOTNAMECHAR: /* \C */
 2325: 	    break;
 2326:         case XML_REGEXP_DECIMAL: /* \d */
 2327: 	    /* can't be a letter, mark, separator, pontuation, symbol or other */
 2328: 	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
 2329: 	        (type2 == XML_REGEXP_REALCHAR) ||
 2330: 		((type2 >= XML_REGEXP_LETTER) &&
 2331: 		 (type2 <= XML_REGEXP_LETTER_OTHERS)) ||
 2332: 	        ((type2 >= XML_REGEXP_MARK) &&
 2333: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2334: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2335: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2336: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2337: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2338: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2339: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2340: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2341: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2342: 		)return(0);
 2343: 	    break;
 2344:         case XML_REGEXP_NOTDECIMAL: /* \D */
 2345: 	    break;
 2346:         case XML_REGEXP_REALCHAR: /* \w */
 2347: 	    /* can't be a mark, separator, pontuation, symbol or other */
 2348: 	    if ((type2 == XML_REGEXP_NOTDECIMAL) ||
 2349: 	        ((type2 >= XML_REGEXP_MARK) &&
 2350: 		 (type2 <= XML_REGEXP_MARK_ENCLOSING)) ||
 2351: 	        ((type2 >= XML_REGEXP_PUNCT) &&
 2352: 		 (type2 <= XML_REGEXP_PUNCT_OTHERS)) ||
 2353: 	        ((type2 >= XML_REGEXP_SEPAR) &&
 2354: 		 (type2 <= XML_REGEXP_SEPAR_PARA)) ||
 2355: 	        ((type2 >= XML_REGEXP_SYMBOL) &&
 2356: 		 (type2 <= XML_REGEXP_SYMBOL_OTHERS)) ||
 2357: 	        ((type2 >= XML_REGEXP_OTHER) &&
 2358: 		 (type2 <= XML_REGEXP_OTHER_NA))
 2359: 		)return(0);
 2360: 	    break;
 2361:         case XML_REGEXP_NOTREALCHAR: /* \W */
 2362: 	    break;
 2363: 	/*
 2364: 	 * at that point we know both type 1 and type2 are from
 2365: 	 * character categories are ordered and are different,
 2366: 	 * it becomes simple because this is a partition
 2367: 	 */
 2368:         case XML_REGEXP_LETTER:
 2369: 	    if (type2 <= XML_REGEXP_LETTER_OTHERS)
 2370: 	        return(1);
 2371: 	    return(0);
 2372:         case XML_REGEXP_LETTER_UPPERCASE:
 2373:         case XML_REGEXP_LETTER_LOWERCASE:
 2374:         case XML_REGEXP_LETTER_TITLECASE:
 2375:         case XML_REGEXP_LETTER_MODIFIER:
 2376:         case XML_REGEXP_LETTER_OTHERS:
 2377: 	    return(0);
 2378:         case XML_REGEXP_MARK:
 2379: 	    if (type2 <= XML_REGEXP_MARK_ENCLOSING)
 2380: 	        return(1);
 2381: 	    return(0);
 2382:         case XML_REGEXP_MARK_NONSPACING:
 2383:         case XML_REGEXP_MARK_SPACECOMBINING:
 2384:         case XML_REGEXP_MARK_ENCLOSING:
 2385: 	    return(0);
 2386:         case XML_REGEXP_NUMBER:
 2387: 	    if (type2 <= XML_REGEXP_NUMBER_OTHERS)
 2388: 	        return(1);
 2389: 	    return(0);
 2390:         case XML_REGEXP_NUMBER_DECIMAL:
 2391:         case XML_REGEXP_NUMBER_LETTER:
 2392:         case XML_REGEXP_NUMBER_OTHERS:
 2393: 	    return(0);
 2394:         case XML_REGEXP_PUNCT:
 2395: 	    if (type2 <= XML_REGEXP_PUNCT_OTHERS)
 2396: 	        return(1);
 2397: 	    return(0);
 2398:         case XML_REGEXP_PUNCT_CONNECTOR:
 2399:         case XML_REGEXP_PUNCT_DASH:
 2400:         case XML_REGEXP_PUNCT_OPEN:
 2401:         case XML_REGEXP_PUNCT_CLOSE:
 2402:         case XML_REGEXP_PUNCT_INITQUOTE:
 2403:         case XML_REGEXP_PUNCT_FINQUOTE:
 2404:         case XML_REGEXP_PUNCT_OTHERS:
 2405: 	    return(0);
 2406:         case XML_REGEXP_SEPAR:
 2407: 	    if (type2 <= XML_REGEXP_SEPAR_PARA)
 2408: 	        return(1);
 2409: 	    return(0);
 2410:         case XML_REGEXP_SEPAR_SPACE:
 2411:         case XML_REGEXP_SEPAR_LINE:
 2412:         case XML_REGEXP_SEPAR_PARA:
 2413: 	    return(0);
 2414:         case XML_REGEXP_SYMBOL:
 2415: 	    if (type2 <= XML_REGEXP_SYMBOL_OTHERS)
 2416: 	        return(1);
 2417: 	    return(0);
 2418:         case XML_REGEXP_SYMBOL_MATH:
 2419:         case XML_REGEXP_SYMBOL_CURRENCY:
 2420:         case XML_REGEXP_SYMBOL_MODIFIER:
 2421:         case XML_REGEXP_SYMBOL_OTHERS:
 2422: 	    return(0);
 2423:         case XML_REGEXP_OTHER:
 2424: 	    if (type2 <= XML_REGEXP_OTHER_NA)
 2425: 	        return(1);
 2426: 	    return(0);
 2427:         case XML_REGEXP_OTHER_CONTROL:
 2428:         case XML_REGEXP_OTHER_FORMAT:
 2429:         case XML_REGEXP_OTHER_PRIVATE:
 2430:         case XML_REGEXP_OTHER_NA:
 2431: 	    return(0);
 2432: 	default:
 2433: 	    break;
 2434:     }
 2435:     return(1);
 2436: }
 2437: 
 2438: /**
 2439:  * xmlFAEqualAtoms:
 2440:  * @atom1:  an atom
 2441:  * @atom2:  an atom
 2442:  * @deep: if not set only compare string pointers
 2443:  *
 2444:  * Compares two atoms to check whether they are the same exactly
 2445:  * this is used to remove equivalent transitions
 2446:  *
 2447:  * Returns 1 if same and 0 otherwise
 2448:  */
 2449: static int
 2450: xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
 2451:     int ret = 0;
 2452: 
 2453:     if (atom1 == atom2)
 2454: 	return(1);
 2455:     if ((atom1 == NULL) || (atom2 == NULL))
 2456: 	return(0);
 2457: 
 2458:     if (atom1->type != atom2->type)
 2459:         return(0);
 2460:     switch (atom1->type) {
 2461:         case XML_REGEXP_EPSILON:
 2462: 	    ret = 0;
 2463: 	    break;
 2464:         case XML_REGEXP_STRING:
 2465:             if (!deep)
 2466:                 ret = (atom1->valuep == atom2->valuep);
 2467:             else
 2468:                 ret = xmlStrEqual((xmlChar *)atom1->valuep,
 2469:                                   (xmlChar *)atom2->valuep);
 2470: 	    break;
 2471:         case XML_REGEXP_CHARVAL:
 2472: 	    ret = (atom1->codepoint == atom2->codepoint);
 2473: 	    break;
 2474: 	case XML_REGEXP_RANGES:
 2475: 	    /* too hard to do in the general case */
 2476: 	    ret = 0;
 2477: 	default:
 2478: 	    break;
 2479:     }
 2480:     return(ret);
 2481: }
 2482: 
 2483: /**
 2484:  * xmlFACompareAtoms:
 2485:  * @atom1:  an atom
 2486:  * @atom2:  an atom
 2487:  * @deep: if not set only compare string pointers
 2488:  *
 2489:  * Compares two atoms to check whether they intersect in some ways,
 2490:  * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
 2491:  *
 2492:  * Returns 1 if yes and 0 otherwise
 2493:  */
 2494: static int
 2495: xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
 2496:     int ret = 1;
 2497: 
 2498:     if (atom1 == atom2)
 2499: 	return(1);
 2500:     if ((atom1 == NULL) || (atom2 == NULL))
 2501: 	return(0);
 2502: 
 2503:     if ((atom1->type == XML_REGEXP_ANYCHAR) ||
 2504:         (atom2->type == XML_REGEXP_ANYCHAR))
 2505: 	return(1);
 2506: 
 2507:     if (atom1->type > atom2->type) {
 2508: 	xmlRegAtomPtr tmp;
 2509: 	tmp = atom1;
 2510: 	atom1 = atom2;
 2511: 	atom2 = tmp;
 2512:     }
 2513:     if (atom1->type != atom2->type) {
 2514:         ret = xmlFACompareAtomTypes(atom1->type, atom2->type);
 2515: 	/* if they can't intersect at the type level break now */
 2516: 	if (ret == 0)
 2517: 	    return(0);
 2518:     }
 2519:     switch (atom1->type) {
 2520:         case XML_REGEXP_STRING:
 2521:             if (!deep)
 2522:                 ret = (atom1->valuep != atom2->valuep);
 2523:             else
 2524:                 ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
 2525:                                              (xmlChar *)atom2->valuep);
 2526: 	    break;
 2527:         case XML_REGEXP_EPSILON:
 2528: 	    goto not_determinist;
 2529:         case XML_REGEXP_CHARVAL:
 2530: 	    if (atom2->type == XML_REGEXP_CHARVAL) {
 2531: 		ret = (atom1->codepoint == atom2->codepoint);
 2532: 	    } else {
 2533: 	        ret = xmlRegCheckCharacter(atom2, atom1->codepoint);
 2534: 		if (ret < 0)
 2535: 		    ret = 1;
 2536: 	    }
 2537: 	    break;
 2538:         case XML_REGEXP_RANGES:
 2539: 	    if (atom2->type == XML_REGEXP_RANGES) {
 2540: 	        int i, j, res;
 2541: 		xmlRegRangePtr r1, r2;
 2542: 
 2543: 		/*
 2544: 		 * need to check that none of the ranges eventually matches
 2545: 		 */
 2546: 		for (i = 0;i < atom1->nbRanges;i++) {
 2547: 		    for (j = 0;j < atom2->nbRanges;j++) {
 2548: 			r1 = atom1->ranges[i];
 2549: 			r2 = atom2->ranges[j];
 2550: 			res = xmlFACompareRanges(r1, r2);
 2551: 			if (res == 1) {
 2552: 			    ret = 1;
 2553: 			    goto done;
 2554: 			}
 2555: 		    }
 2556: 		}
 2557: 		ret = 0;
 2558: 	    }
 2559: 	    break;
 2560: 	default:
 2561: 	    goto not_determinist;
 2562:     }
 2563: done:
 2564:     if (atom1->neg != atom2->neg) {
 2565:         ret = !ret;
 2566:     }
 2567:     if (ret == 0)
 2568:         return(0);
 2569: not_determinist:
 2570:     return(1);
 2571: }
 2572: 
 2573: /**
 2574:  * xmlFARecurseDeterminism:
 2575:  * @ctxt:  a regexp parser context
 2576:  *
 2577:  * Check whether the associated regexp is determinist,
 2578:  * should be called after xmlFAEliminateEpsilonTransitions()
 2579:  *
 2580:  */
 2581: static int
 2582: xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 2583: 	                 int to, xmlRegAtomPtr atom) {
 2584:     int ret = 1;
 2585:     int res;
 2586:     int transnr, nbTrans;
 2587:     xmlRegTransPtr t1;
 2588:     int deep = 1;
 2589: 
 2590:     if (state == NULL)
 2591: 	return(ret);
 2592: 
 2593:     if (ctxt->flags & AM_AUTOMATA_RNG)
 2594:         deep = 0;
 2595: 
 2596:     /*
 2597:      * don't recurse on transitions potentially added in the course of
 2598:      * the elimination.
 2599:      */
 2600:     nbTrans = state->nbTrans;
 2601:     for (transnr = 0;transnr < nbTrans;transnr++) {
 2602: 	t1 = &(state->trans[transnr]);
 2603: 	/*
 2604: 	 * check transitions conflicting with the one looked at
 2605: 	 */
 2606: 	if (t1->atom == NULL) {
 2607: 	    if (t1->to < 0)
 2608: 		continue;
 2609: 	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 2610: 		                           to, atom);
 2611: 	    if (res == 0) {
 2612: 	        ret = 0;
 2613: 		/* t1->nd = 1; */
 2614: 	    }
 2615: 	    continue;
 2616: 	}
 2617: 	if (t1->to != to)
 2618: 	    continue;
 2619: 	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
 2620: 	    ret = 0;
 2621: 	    /* mark the transition as non-deterministic */
 2622: 	    t1->nd = 1;
 2623: 	}
 2624:     }
 2625:     return(ret);
 2626: }
 2627: 
 2628: /**
 2629:  * xmlFAComputesDeterminism:
 2630:  * @ctxt:  a regexp parser context
 2631:  *
 2632:  * Check whether the associated regexp is determinist,
 2633:  * should be called after xmlFAEliminateEpsilonTransitions()
 2634:  *
 2635:  */
 2636: static int
 2637: xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 2638:     int statenr, transnr;
 2639:     xmlRegStatePtr state;
 2640:     xmlRegTransPtr t1, t2, last;
 2641:     int i;
 2642:     int ret = 1;
 2643:     int deep = 1;
 2644: 
 2645: #ifdef DEBUG_REGEXP_GRAPH
 2646:     printf("xmlFAComputesDeterminism\n");
 2647:     xmlRegPrintCtxt(stdout, ctxt);
 2648: #endif
 2649:     if (ctxt->determinist != -1)
 2650: 	return(ctxt->determinist);
 2651: 
 2652:     if (ctxt->flags & AM_AUTOMATA_RNG)
 2653:         deep = 0;
 2654: 
 2655:     /*
 2656:      * First cleanup the automata removing cancelled transitions
 2657:      */
 2658:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2659: 	state = ctxt->states[statenr];
 2660: 	if (state == NULL)
 2661: 	    continue;
 2662: 	if (state->nbTrans < 2)
 2663: 	    continue;
 2664: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2665: 	    t1 = &(state->trans[transnr]);
 2666: 	    /*
 2667: 	     * Determinism checks in case of counted or all transitions
 2668: 	     * will have to be handled separately
 2669: 	     */
 2670: 	    if (t1->atom == NULL) {
 2671: 		/* t1->nd = 1; */
 2672: 		continue;
 2673: 	    }
 2674: 	    if (t1->to == -1) /* eliminated */
 2675: 		continue;
 2676: 	    for (i = 0;i < transnr;i++) {
 2677: 		t2 = &(state->trans[i]);
 2678: 		if (t2->to == -1) /* eliminated */
 2679: 		    continue;
 2680: 		if (t2->atom != NULL) {
 2681: 		    if (t1->to == t2->to) {
 2682:                         /*
 2683:                          * Here we use deep because we want to keep the
 2684:                          * transitions which indicate a conflict
 2685:                          */
 2686: 			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
 2687:                             (t1->counter == t2->counter) &&
 2688:                             (t1->count == t2->count))
 2689: 			    t2->to = -1; /* eliminated */
 2690: 		    }
 2691: 		}
 2692: 	    }
 2693: 	}
 2694:     }
 2695: 
 2696:     /*
 2697:      * Check for all states that there aren't 2 transitions
 2698:      * with the same atom and a different target.
 2699:      */
 2700:     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 2701: 	state = ctxt->states[statenr];
 2702: 	if (state == NULL)
 2703: 	    continue;
 2704: 	if (state->nbTrans < 2)
 2705: 	    continue;
 2706: 	last = NULL;
 2707: 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 2708: 	    t1 = &(state->trans[transnr]);
 2709: 	    /*
 2710: 	     * Determinism checks in case of counted or all transitions
 2711: 	     * will have to be handled separately
 2712: 	     */
 2713: 	    if (t1->atom == NULL) {
 2714: 		continue;
 2715: 	    }
 2716: 	    if (t1->to == -1) /* eliminated */
 2717: 		continue;
 2718: 	    for (i = 0;i < transnr;i++) {
 2719: 		t2 = &(state->trans[i]);
 2720: 		if (t2->to == -1) /* eliminated */
 2721: 		    continue;
 2722: 		if (t2->atom != NULL) {
 2723:                     /*
 2724:                      * But here we don't use deep because we want to
 2725:                      * find transitions which indicate a conflict
 2726:                      */
 2727: 		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
 2728: 			ret = 0;
 2729: 			/* mark the transitions as non-deterministic ones */
 2730: 			t1->nd = 1;
 2731: 			t2->nd = 1;
 2732: 			last = t1;
 2733: 		    }
 2734: 		} else if (t1->to != -1) {
 2735: 		    /*
 2736: 		     * do the closure in case of remaining specific
 2737: 		     * epsilon transitions like choices or all
 2738: 		     */
 2739: 		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
 2740: 						   t2->to, t2->atom);
 2741: 		    /* don't shortcut the computation so all non deterministic
 2742: 		       transition get marked down
 2743: 		    if (ret == 0)
 2744: 			return(0);
 2745: 		     */
 2746: 		    if (ret == 0) {
 2747: 			t1->nd = 1;
 2748: 			/* t2->nd = 1; */
 2749: 			last = t1;
 2750: 		    }
 2751: 		}
 2752: 	    }
 2753: 	    /* don't shortcut the computation so all non deterministic
 2754: 	       transition get marked down
 2755: 	    if (ret == 0)
 2756: 		break; */
 2757: 	}
 2758: 
 2759: 	/*
 2760: 	 * mark specifically the last non-deterministic transition
 2761: 	 * from a state since there is no need to set-up rollback
 2762: 	 * from it
 2763: 	 */
 2764: 	if (last != NULL) {
 2765: 	    last->nd = 2;
 2766: 	}
 2767: 
 2768: 	/* don't shortcut the computation so all non deterministic
 2769: 	   transition get marked down
 2770: 	if (ret == 0)
 2771: 	    break; */
 2772:     }
 2773: 
 2774:     ctxt->determinist = ret;
 2775:     return(ret);
 2776: }
 2777: 
 2778: /************************************************************************
 2779:  * 									*
 2780:  *	Routines to check input against transition atoms		*
 2781:  * 									*
 2782:  ************************************************************************/
 2783: 
 2784: static int
 2785: xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
 2786: 	                  int start, int end, const xmlChar *blockName) {
 2787:     int ret = 0;
 2788: 
 2789:     switch (type) {
 2790:         case XML_REGEXP_STRING:
 2791:         case XML_REGEXP_SUBREG:
 2792:         case XML_REGEXP_RANGES:
 2793:         case XML_REGEXP_EPSILON:
 2794: 	    return(-1);
 2795:         case XML_REGEXP_ANYCHAR:
 2796: 	    ret = ((codepoint != '\n') && (codepoint != '\r'));
 2797: 	    break;
 2798:         case XML_REGEXP_CHARVAL:
 2799: 	    ret = ((codepoint >= start) && (codepoint <= end));
 2800: 	    break;
 2801:         case XML_REGEXP_NOTSPACE:
 2802: 	    neg = !neg;
 2803:         case XML_REGEXP_ANYSPACE:
 2804: 	    ret = ((codepoint == '\n') || (codepoint == '\r') ||
 2805: 		   (codepoint == '\t') || (codepoint == ' '));
 2806: 	    break;
 2807:         case XML_REGEXP_NOTINITNAME:
 2808: 	    neg = !neg;
 2809:         case XML_REGEXP_INITNAME:
 2810: 	    ret = (IS_LETTER(codepoint) || 
 2811: 		   (codepoint == '_') || (codepoint == ':'));
 2812: 	    break;
 2813:         case XML_REGEXP_NOTNAMECHAR:
 2814: 	    neg = !neg;
 2815:         case XML_REGEXP_NAMECHAR:
 2816: 	    ret = (IS_LETTER(codepoint) || IS_DIGIT(codepoint) ||
 2817: 		   (codepoint == '.') || (codepoint == '-') ||
 2818: 		   (codepoint == '_') || (codepoint == ':') ||
 2819: 		   IS_COMBINING(codepoint) || IS_EXTENDER(codepoint));
 2820: 	    break;
 2821:         case XML_REGEXP_NOTDECIMAL:
 2822: 	    neg = !neg;
 2823:         case XML_REGEXP_DECIMAL:
 2824: 	    ret = xmlUCSIsCatNd(codepoint);
 2825: 	    break;
 2826:         case XML_REGEXP_REALCHAR:
 2827: 	    neg = !neg;
 2828:         case XML_REGEXP_NOTREALCHAR:
 2829: 	    ret = xmlUCSIsCatP(codepoint);
 2830: 	    if (ret == 0)
 2831: 		ret = xmlUCSIsCatZ(codepoint);
 2832: 	    if (ret == 0)
 2833: 		ret = xmlUCSIsCatC(codepoint);
 2834: 	    break;
 2835:         case XML_REGEXP_LETTER:
 2836: 	    ret = xmlUCSIsCatL(codepoint);
 2837: 	    break;
 2838:         case XML_REGEXP_LETTER_UPPERCASE:
 2839: 	    ret = xmlUCSIsCatLu(codepoint);
 2840: 	    break;
 2841:         case XML_REGEXP_LETTER_LOWERCASE:
 2842: 	    ret = xmlUCSIsCatLl(codepoint);
 2843: 	    break;
 2844:         case XML_REGEXP_LETTER_TITLECASE:
 2845: 	    ret = xmlUCSIsCatLt(codepoint);
 2846: 	    break;
 2847:         case XML_REGEXP_LETTER_MODIFIER:
 2848: 	    ret = xmlUCSIsCatLm(codepoint);
 2849: 	    break;
 2850:         case XML_REGEXP_LETTER_OTHERS:
 2851: 	    ret = xmlUCSIsCatLo(codepoint);
 2852: 	    break;
 2853:         case XML_REGEXP_MARK:
 2854: 	    ret = xmlUCSIsCatM(codepoint);
 2855: 	    break;
 2856:         case XML_REGEXP_MARK_NONSPACING:
 2857: 	    ret = xmlUCSIsCatMn(codepoint);
 2858: 	    break;
 2859:         case XML_REGEXP_MARK_SPACECOMBINING:
 2860: 	    ret = xmlUCSIsCatMc(codepoint);
 2861: 	    break;
 2862:         case XML_REGEXP_MARK_ENCLOSING:
 2863: 	    ret = xmlUCSIsCatMe(codepoint);
 2864: 	    break;
 2865:         case XML_REGEXP_NUMBER:
 2866: 	    ret = xmlUCSIsCatN(codepoint);
 2867: 	    break;
 2868:         case XML_REGEXP_NUMBER_DECIMAL:
 2869: 	    ret = xmlUCSIsCatNd(codepoint);
 2870: 	    break;
 2871:         case XML_REGEXP_NUMBER_LETTER:
 2872: 	    ret = xmlUCSIsCatNl(codepoint);
 2873: 	    break;
 2874:         case XML_REGEXP_NUMBER_OTHERS:
 2875: 	    ret = xmlUCSIsCatNo(codepoint);
 2876: 	    break;
 2877:         case XML_REGEXP_PUNCT:
 2878: 	    ret = xmlUCSIsCatP(codepoint);
 2879: 	    break;
 2880:         case XML_REGEXP_PUNCT_CONNECTOR:
 2881: 	    ret = xmlUCSIsCatPc(codepoint);
 2882: 	    break;
 2883:         case XML_REGEXP_PUNCT_DASH:
 2884: 	    ret = xmlUCSIsCatPd(codepoint);
 2885: 	    break;
 2886:         case XML_REGEXP_PUNCT_OPEN:
 2887: 	    ret = xmlUCSIsCatPs(codepoint);
 2888: 	    break;
 2889:         case XML_REGEXP_PUNCT_CLOSE:
 2890: 	    ret = xmlUCSIsCatPe(codepoint);
 2891: 	    break;
 2892:         case XML_REGEXP_PUNCT_INITQUOTE:
 2893: 	    ret = xmlUCSIsCatPi(codepoint);
 2894: 	    break;
 2895:         case XML_REGEXP_PUNCT_FINQUOTE:
 2896: 	    ret = xmlUCSIsCatPf(codepoint);
 2897: 	    break;
 2898:         case XML_REGEXP_PUNCT_OTHERS:
 2899: 	    ret = xmlUCSIsCatPo(codepoint);
 2900: 	    break;
 2901:         case XML_REGEXP_SEPAR:
 2902: 	    ret = xmlUCSIsCatZ(codepoint);
 2903: 	    break;
 2904:         case XML_REGEXP_SEPAR_SPACE:
 2905: 	    ret = xmlUCSIsCatZs(codepoint);
 2906: 	    break;
 2907:         case XML_REGEXP_SEPAR_LINE:
 2908: 	    ret = xmlUCSIsCatZl(codepoint);
 2909: 	    break;
 2910:         case XML_REGEXP_SEPAR_PARA:
 2911: 	    ret = xmlUCSIsCatZp(codepoint);
 2912: 	    break;
 2913:         case XML_REGEXP_SYMBOL:
 2914: 	    ret = xmlUCSIsCatS(codepoint);
 2915: 	    break;
 2916:         case XML_REGEXP_SYMBOL_MATH:
 2917: 	    ret = xmlUCSIsCatSm(codepoint);
 2918: 	    break;
 2919:         case XML_REGEXP_SYMBOL_CURRENCY:
 2920: 	    ret = xmlUCSIsCatSc(codepoint);
 2921: 	    break;
 2922:         case XML_REGEXP_SYMBOL_MODIFIER:
 2923: 	    ret = xmlUCSIsCatSk(codepoint);
 2924: 	    break;
 2925:         case XML_REGEXP_SYMBOL_OTHERS:
 2926: 	    ret = xmlUCSIsCatSo(codepoint);
 2927: 	    break;
 2928:         case XML_REGEXP_OTHER:
 2929: 	    ret = xmlUCSIsCatC(codepoint);
 2930: 	    break;
 2931:         case XML_REGEXP_OTHER_CONTROL:
 2932: 	    ret = xmlUCSIsCatCc(codepoint);
 2933: 	    break;
 2934:         case XML_REGEXP_OTHER_FORMAT:
 2935: 	    ret = xmlUCSIsCatCf(codepoint);
 2936: 	    break;
 2937:         case XML_REGEXP_OTHER_PRIVATE:
 2938: 	    ret = xmlUCSIsCatCo(codepoint);
 2939: 	    break;
 2940:         case XML_REGEXP_OTHER_NA:
 2941: 	    /* ret = xmlUCSIsCatCn(codepoint); */
 2942: 	    /* Seems it doesn't exist anymore in recent Unicode releases */
 2943: 	    ret = 0;
 2944: 	    break;
 2945:         case XML_REGEXP_BLOCK_NAME:
 2946: 	    ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
 2947: 	    break;
 2948:     }
 2949:     if (neg)
 2950: 	return(!ret);
 2951:     return(ret);
 2952: }
 2953: 
 2954: static int
 2955: xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
 2956:     int i, ret = 0;
 2957:     xmlRegRangePtr range;
 2958: 
 2959:     if ((atom == NULL) || (!IS_CHAR(codepoint)))
 2960: 	return(-1);
 2961: 
 2962:     switch (atom->type) {
 2963:         case XML_REGEXP_SUBREG:
 2964:         case XML_REGEXP_EPSILON:
 2965: 	    return(-1);
 2966:         case XML_REGEXP_CHARVAL:
 2967:             return(codepoint == atom->codepoint);
 2968:         case XML_REGEXP_RANGES: {
 2969: 	    int accept = 0;
 2970: 
 2971: 	    for (i = 0;i < atom->nbRanges;i++) {
 2972: 		range = atom->ranges[i];
 2973: 		if (range->neg == 2) {
 2974: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2975: 						0, range->start, range->end,
 2976: 						range->blockName);
 2977: 		    if (ret != 0)
 2978: 			return(0); /* excluded char */
 2979: 		} else if (range->neg) {
 2980: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2981: 						0, range->start, range->end,
 2982: 						range->blockName);
 2983: 		    if (ret == 0)
 2984: 		        accept = 1;
 2985: 		    else
 2986: 		        return(0);
 2987: 		} else {
 2988: 		    ret = xmlRegCheckCharacterRange(range->type, codepoint,
 2989: 						0, range->start, range->end,
 2990: 						range->blockName);
 2991: 		    if (ret != 0)
 2992: 			accept = 1; /* might still be excluded */
 2993: 		}
 2994: 	    }
 2995: 	    return(accept);
 2996: 	}
 2997:         case XML_REGEXP_STRING:
 2998: 	    printf("TODO: XML_REGEXP_STRING\n");
 2999: 	    return(-1);
 3000:         case XML_REGEXP_ANYCHAR:
 3001:         case XML_REGEXP_ANYSPACE:
 3002:         case XML_REGEXP_NOTSPACE:
 3003:         case XML_REGEXP_INITNAME:
 3004:         case XML_REGEXP_NOTINITNAME:
 3005:         case XML_REGEXP_NAMECHAR:
 3006:         case XML_REGEXP_NOTNAMECHAR:
 3007:         case XML_REGEXP_DECIMAL:
 3008:         case XML_REGEXP_NOTDECIMAL:
 3009:         case XML_REGEXP_REALCHAR:
 3010:         case XML_REGEXP_NOTREALCHAR:
 3011:         case XML_REGEXP_LETTER:
 3012:         case XML_REGEXP_LETTER_UPPERCASE:
 3013:         case XML_REGEXP_LETTER_LOWERCASE:
 3014:         case XML_REGEXP_LETTER_TITLECASE:
 3015:         case XML_REGEXP_LETTER_MODIFIER:
 3016:         case XML_REGEXP_LETTER_OTHERS:
 3017:         case XML_REGEXP_MARK:
 3018:         case XML_REGEXP_MARK_NONSPACING:
 3019:         case XML_REGEXP_MARK_SPACECOMBINING:
 3020:         case XML_REGEXP_MARK_ENCLOSING:
 3021:         case XML_REGEXP_NUMBER:
 3022:         case XML_REGEXP_NUMBER_DECIMAL:
 3023:         case XML_REGEXP_NUMBER_LETTER:
 3024:         case XML_REGEXP_NUMBER_OTHERS:
 3025:         case XML_REGEXP_PUNCT:
 3026:         case XML_REGEXP_PUNCT_CONNECTOR:
 3027:         case XML_REGEXP_PUNCT_DASH:
 3028:         case XML_REGEXP_PUNCT_OPEN:
 3029:         case XML_REGEXP_PUNCT_CLOSE:
 3030:         case XML_REGEXP_PUNCT_INITQUOTE:
 3031:         case XML_REGEXP_PUNCT_FINQUOTE:
 3032:         case XML_REGEXP_PUNCT_OTHERS:
 3033:         case XML_REGEXP_SEPAR:
 3034:         case XML_REGEXP_SEPAR_SPACE:
 3035:         case XML_REGEXP_SEPAR_LINE:
 3036:         case XML_REGEXP_SEPAR_PARA:
 3037:         case XML_REGEXP_SYMBOL:
 3038:         case XML_REGEXP_SYMBOL_MATH:
 3039:         case XML_REGEXP_SYMBOL_CURRENCY:
 3040:         case XML_REGEXP_SYMBOL_MODIFIER:
 3041:         case XML_REGEXP_SYMBOL_OTHERS:
 3042:         case XML_REGEXP_OTHER:
 3043:         case XML_REGEXP_OTHER_CONTROL:
 3044:         case XML_REGEXP_OTHER_FORMAT:
 3045:         case XML_REGEXP_OTHER_PRIVATE:
 3046:         case XML_REGEXP_OTHER_NA:
 3047: 	case XML_REGEXP_BLOCK_NAME:
 3048: 	    ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
 3049: 		                            (const xmlChar *)atom->valuep);
 3050: 	    if (atom->neg)
 3051: 		ret = !ret;
 3052: 	    break;
 3053:     }
 3054:     return(ret);
 3055: }
 3056: 
 3057: /************************************************************************
 3058:  * 									*
 3059:  *	Saving and restoring state of an execution context		*
 3060:  * 									*
 3061:  ************************************************************************/
 3062: 
 3063: #ifdef DEBUG_REGEXP_EXEC
 3064: static void
 3065: xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
 3066:     printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
 3067:     if (exec->inputStack != NULL) {
 3068: 	int i;
 3069: 	printf(": ");
 3070: 	for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
 3071: 	    printf("%s ", (const char *)
 3072: 	           exec->inputStack[exec->inputStackNr - (i + 1)].value);
 3073:     } else {
 3074: 	printf(": %s", &(exec->inputString[exec->index]));
 3075:     }
 3076:     printf("\n");
 3077: }
 3078: #endif
 3079: 
 3080: static void
 3081: xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
 3082: #ifdef DEBUG_REGEXP_EXEC
 3083:     printf("saving ");
 3084:     exec->transno++;
 3085:     xmlFARegDebugExec(exec);
 3086:     exec->transno--;
 3087: #endif
 3088: #ifdef MAX_PUSH
 3089:     if (exec->nbPush > MAX_PUSH) {
 3090:         return;
 3091:     }
 3092:     exec->nbPush++;
 3093: #endif
 3094: 
 3095:     if (exec->maxRollbacks == 0) {
 3096: 	exec->maxRollbacks = 4;
 3097: 	exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
 3098: 		                             sizeof(xmlRegExecRollback));
 3099: 	if (exec->rollbacks == NULL) {
 3100: 	    xmlRegexpErrMemory(NULL, "saving regexp");
 3101: 	    exec->maxRollbacks = 0;
 3102: 	    return;
 3103: 	}
 3104: 	memset(exec->rollbacks, 0,
 3105: 	       exec->maxRollbacks * sizeof(xmlRegExecRollback));
 3106:     } else if (exec->nbRollbacks >= exec->maxRollbacks) {
 3107: 	xmlRegExecRollback *tmp;
 3108: 	int len = exec->maxRollbacks;
 3109: 
 3110: 	exec->maxRollbacks *= 2;
 3111: 	tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
 3112: 			exec->maxRollbacks * sizeof(xmlRegExecRollback));
 3113: 	if (tmp == NULL) {
 3114: 	    xmlRegexpErrMemory(NULL, "saving regexp");
 3115: 	    exec->maxRollbacks /= 2;
 3116: 	    return;
 3117: 	}
 3118: 	exec->rollbacks = tmp;
 3119: 	tmp = &exec->rollbacks[len];
 3120: 	memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
 3121:     }
 3122:     exec->rollbacks[exec->nbRollbacks].state = exec->state;
 3123:     exec->rollbacks[exec->nbRollbacks].index = exec->index;
 3124:     exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
 3125:     if (exec->comp->nbCounters > 0) {
 3126: 	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3127: 	    exec->rollbacks[exec->nbRollbacks].counts = (int *)
 3128: 		xmlMalloc(exec->comp->nbCounters * sizeof(int));
 3129: 	    if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3130: 		xmlRegexpErrMemory(NULL, "saving regexp");
 3131: 		exec->status = -5;
 3132: 		return;
 3133: 	    }
 3134: 	}
 3135: 	memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
 3136: 	       exec->comp->nbCounters * sizeof(int));
 3137:     }
 3138:     exec->nbRollbacks++;
 3139: }
 3140: 
 3141: static void
 3142: xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
 3143:     if (exec->nbRollbacks <= 0) {
 3144: 	exec->status = -1;
 3145: #ifdef DEBUG_REGEXP_EXEC
 3146: 	printf("rollback failed on empty stack\n");
 3147: #endif
 3148: 	return;
 3149:     }
 3150:     exec->nbRollbacks--;
 3151:     exec->state = exec->rollbacks[exec->nbRollbacks].state;
 3152:     exec->index = exec->rollbacks[exec->nbRollbacks].index;
 3153:     exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
 3154:     if (exec->comp->nbCounters > 0) {
 3155: 	if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
 3156: 	    fprintf(stderr, "exec save: allocation failed");
 3157: 	    exec->status = -6;
 3158: 	    return;
 3159: 	}
 3160: 	memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
 3161: 	       exec->comp->nbCounters * sizeof(int));
 3162:     }
 3163: 
 3164: #ifdef DEBUG_REGEXP_EXEC
 3165:     printf("restored ");
 3166:     xmlFARegDebugExec(exec);
 3167: #endif
 3168: }
 3169: 
 3170: /************************************************************************
 3171:  * 									*
 3172:  *	Verifier, running an input against a compiled regexp		*
 3173:  * 									*
 3174:  ************************************************************************/
 3175: 
 3176: static int
 3177: xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
 3178:     xmlRegExecCtxt execval;
 3179:     xmlRegExecCtxtPtr exec = &execval;
 3180:     int ret, codepoint = 0, len, deter;
 3181: 
 3182:     exec->inputString = content;
 3183:     exec->index = 0;
 3184:     exec->nbPush = 0;
 3185:     exec->determinist = 1;
 3186:     exec->maxRollbacks = 0;
 3187:     exec->nbRollbacks = 0;
 3188:     exec->rollbacks = NULL;
 3189:     exec->status = 0;
 3190:     exec->comp = comp;
 3191:     exec->state = comp->states[0];
 3192:     exec->transno = 0;
 3193:     exec->transcount = 0;
 3194:     exec->inputStack = NULL;
 3195:     exec->inputStackMax = 0;
 3196:     if (comp->nbCounters > 0) {
 3197: 	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
 3198: 	if (exec->counts == NULL) {
 3199: 	    xmlRegexpErrMemory(NULL, "running regexp");
 3200: 	    return(-1);
 3201: 	}
 3202:         memset(exec->counts, 0, comp->nbCounters * sizeof(int));
 3203:     } else
 3204: 	exec->counts = NULL;
 3205:     while ((exec->status == 0) &&
 3206: 	   ((exec->inputString[exec->index] != 0) ||
 3207: 	    ((exec->state != NULL) &&
 3208: 	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 3209: 	xmlRegTransPtr trans;
 3210: 	xmlRegAtomPtr atom;
 3211: 
 3212: 	/*
 3213: 	 * If end of input on non-terminal state, rollback, however we may
 3214: 	 * still have epsilon like transition for counted transitions
 3215: 	 * on counters, in that case don't break too early.  Additionally,
 3216: 	 * if we are working on a range like "AB{0,2}", where B is not present,
 3217: 	 * we don't want to break.
 3218: 	 */
 3219: 	len = 1;
 3220: 	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL)) {
 3221: 	    /*
 3222: 	     * if there is a transition, we must check if
 3223: 	     *  atom allows minOccurs of 0
 3224: 	     */
 3225: 	    if (exec->transno < exec->state->nbTrans) {
 3226: 	        trans = &exec->state->trans[exec->transno];
 3227: 		if (trans->to >=0) {
 3228: 		    atom = trans->atom;
 3229: 		    if (!((atom->min == 0) && (atom->max > 0)))
 3230: 		        goto rollback;
 3231: 		}
 3232: 	    } else
 3233: 	        goto rollback;
 3234: 	}
 3235: 
 3236: 	exec->transcount = 0;
 3237: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 3238: 	    trans = &exec->state->trans[exec->transno];
 3239: 	    if (trans->to < 0)
 3240: 		continue;
 3241: 	    atom = trans->atom;
 3242: 	    ret = 0;
 3243: 	    deter = 1;
 3244: 	    if (trans->count >= 0) {
 3245: 		int count;
 3246: 		xmlRegCounterPtr counter;
 3247: 
 3248: 		if (exec->counts == NULL) {
 3249: 		    exec->status = -1;
 3250: 		    goto error;
 3251: 		}
 3252: 		/*
 3253: 		 * A counted transition.
 3254: 		 */
 3255: 
 3256: 		count = exec->counts[trans->count];
 3257: 		counter = &exec->comp->counters[trans->count];
 3258: #ifdef DEBUG_REGEXP_EXEC
 3259: 		printf("testing count %d: val %d, min %d, max %d\n",
 3260: 		       trans->count, count, counter->min,  counter->max);
 3261: #endif
 3262: 		ret = ((count >= counter->min) && (count <= counter->max));
 3263: 		if ((ret) && (counter->min != counter->max))
 3264: 		    deter = 0;
 3265: 	    } else if (atom == NULL) {
 3266: 		fprintf(stderr, "epsilon transition left at runtime\n");
 3267: 		exec->status = -2;
 3268: 		break;
 3269: 	    } else if (exec->inputString[exec->index] != 0) {
 3270:                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
 3271: 		ret = xmlRegCheckCharacter(atom, codepoint);
 3272: 		if ((ret == 1) && (atom->min >= 0) && (atom->max > 0)) {
 3273: 		    xmlRegStatePtr to = comp->states[trans->to];
 3274: 
 3275: 		    /*
 3276: 		     * this is a multiple input sequence
 3277: 		     * If there is a counter associated increment it now.
 3278: 		     * before potentially saving and rollback
 3279: 		     * do not increment if the counter is already over the
 3280: 		     * maximum limit in which case get to next transition
 3281: 		     */
 3282: 		    if (trans->counter >= 0) {
 3283: 			xmlRegCounterPtr counter;
 3284: 
 3285: 			if ((exec->counts == NULL) ||
 3286: 			    (exec->comp == NULL) ||
 3287: 			    (exec->comp->counters == NULL)) {
 3288: 			    exec->status = -1;
 3289: 			    goto error;
 3290: 			}
 3291: 			counter = &exec->comp->counters[trans->counter];
 3292: 			if (exec->counts[trans->counter] >= counter->max)
 3293: 			    continue; /* for loop on transitions */
 3294: 
 3295: #ifdef DEBUG_REGEXP_EXEC
 3296: 			printf("Increasing count %d\n", trans->counter);
 3297: #endif
 3298: 			exec->counts[trans->counter]++;
 3299: 		    }
 3300: 		    if (exec->state->nbTrans > exec->transno + 1) {
 3301: 			xmlFARegExecSave(exec);
 3302: 		    }
 3303: 		    exec->transcount = 1;
 3304: 		    do {
 3305: 			/*
 3306: 			 * Try to progress as much as possible on the input
 3307: 			 */
 3308: 			if (exec->transcount == atom->max) {
 3309: 			    break;
 3310: 			}
 3311: 			exec->index += len;
 3312: 			/*
 3313: 			 * End of input: stop here
 3314: 			 */
 3315: 			if (exec->inputString[exec->index] == 0) {
 3316: 			    exec->index -= len;
 3317: 			    break;
 3318: 			}
 3319: 			if (exec->transcount >= atom->min) {
 3320: 			    int transno = exec->transno;
 3321: 			    xmlRegStatePtr state = exec->state;
 3322: 
 3323: 			    /*
 3324: 			     * The transition is acceptable save it
 3325: 			     */
 3326: 			    exec->transno = -1; /* trick */
 3327: 			    exec->state = to;
 3328: 			    xmlFARegExecSave(exec);
 3329: 			    exec->transno = transno;
 3330: 			    exec->state = state;
 3331: 			}
 3332: 			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
 3333: 				              len);
 3334: 			ret = xmlRegCheckCharacter(atom, codepoint);
 3335: 			exec->transcount++;
 3336: 		    } while (ret == 1);
 3337: 		    if (exec->transcount < atom->min)
 3338: 			ret = 0;
 3339: 
 3340: 		    /*
 3341: 		     * If the last check failed but one transition was found
 3342: 		     * possible, rollback
 3343: 		     */
 3344: 		    if (ret < 0)
 3345: 			ret = 0;
 3346: 		    if (ret == 0) {
 3347: 			goto rollback;
 3348: 		    }
 3349: 		    if (trans->counter >= 0) {
 3350: 			if (exec->counts == NULL) {
 3351: 			    exec->status = -1;
 3352: 			    goto error;
 3353: 			}
 3354: #ifdef DEBUG_REGEXP_EXEC
 3355: 			printf("Decreasing count %d\n", trans->counter);
 3356: #endif
 3357: 			exec->counts[trans->counter]--;
 3358: 		    }
 3359: 		} else if ((ret == 0) && (atom->min == 0) && (atom->max > 0)) {
 3360: 		    /*
 3361: 		     * we don't match on the codepoint, but minOccurs of 0
 3362: 		     * says that's ok.  Setting len to 0 inhibits stepping
 3363: 		     * over the codepoint.
 3364: 		     */
 3365: 		    exec->transcount = 1;
 3366: 		    len = 0;
 3367: 		    ret = 1;
 3368: 		}
 3369: 	    } else if ((atom->min == 0) && (atom->max > 0)) {
 3370: 	        /* another spot to match when minOccurs is 0 */
 3371: 		exec->transcount = 1;
 3372: 		len = 0;
 3373: 		ret = 1;
 3374: 	    }
 3375: 	    if (ret == 1) {
 3376: 		if ((trans->nd == 1) ||
 3377: 		    ((trans->count >= 0) && (deter == 0) &&
 3378: 		     (exec->state->nbTrans > exec->transno + 1))) {
 3379: #ifdef DEBUG_REGEXP_EXEC
 3380: 		    if (trans->nd == 1)
 3381: 		        printf("Saving on nd transition atom %d for %c at %d\n",
 3382: 			       trans->atom->no, codepoint, exec->index);
 3383: 		    else
 3384: 		        printf("Saving on counted transition count %d for %c at %d\n",
 3385: 			       trans->count, codepoint, exec->index);
 3386: #endif
 3387: 		    xmlFARegExecSave(exec);
 3388: 		}
 3389: 		if (trans->counter >= 0) {
 3390: 		    xmlRegCounterPtr counter;
 3391: 
 3392:                     /* make sure we don't go over the counter maximum value */
 3393: 		    if ((exec->counts == NULL) ||
 3394: 			(exec->comp == NULL) ||
 3395: 			(exec->comp->counters == NULL)) {
 3396: 			exec->status = -1;
 3397: 			goto error;
 3398: 		    }
 3399: 		    counter = &exec->comp->counters[trans->counter];
 3400: 		    if (exec->counts[trans->counter] >= counter->max)
 3401: 			continue; /* for loop on transitions */
 3402: #ifdef DEBUG_REGEXP_EXEC
 3403: 		    printf("Increasing count %d\n", trans->counter);
 3404: #endif
 3405: 		    exec->counts[trans->counter]++;
 3406: 		}
 3407: 		if ((trans->count >= 0) &&
 3408: 		    (trans->count < REGEXP_ALL_COUNTER)) {
 3409: 		    if (exec->counts == NULL) {
 3410: 		        exec->status = -1;
 3411: 			goto error;
 3412: 		    }
 3413: #ifdef DEBUG_REGEXP_EXEC
 3414: 		    printf("resetting count %d on transition\n",
 3415: 		           trans->count);
 3416: #endif
 3417: 		    exec->counts[trans->count] = 0;
 3418: 		}
 3419: #ifdef DEBUG_REGEXP_EXEC
 3420: 		printf("entering state %d\n", trans->to);
 3421: #endif
 3422: 		exec->state = comp->states[trans->to];
 3423: 		exec->transno = 0;
 3424: 		if (trans->atom != NULL) {
 3425: 		    exec->index += len;
 3426: 		}
 3427: 		goto progress;
 3428: 	    } else if (ret < 0) {
 3429: 		exec->status = -4;
 3430: 		break;
 3431: 	    }
 3432: 	}
 3433: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 3434: rollback:
 3435: 	    /*
 3436: 	     * Failed to find a way out
 3437: 	     */
 3438: 	    exec->determinist = 0;
 3439: #ifdef DEBUG_REGEXP_EXEC
 3440: 	    printf("rollback from state %d on %d:%c\n", exec->state->no,
 3441: 	           codepoint,codepoint);
 3442: #endif
 3443: 	    xmlFARegExecRollBack(exec);
 3444: 	}
 3445: progress:
 3446: 	continue;
 3447:     }
 3448: error:
 3449:     if (exec->rollbacks != NULL) {
 3450: 	if (exec->counts != NULL) {
 3451: 	    int i;
 3452: 
 3453: 	    for (i = 0;i < exec->maxRollbacks;i++)
 3454: 		if (exec->rollbacks[i].counts != NULL)
 3455: 		    xmlFree(exec->rollbacks[i].counts);
 3456: 	}
 3457: 	xmlFree(exec->rollbacks);
 3458:     }
 3459:     if (exec->counts != NULL)
 3460: 	xmlFree(exec->counts);
 3461:     if (exec->status == 0)
 3462: 	return(1);
 3463:     if (exec->status == -1) {
 3464: 	if (exec->nbPush > MAX_PUSH)
 3465: 	    return(-1);
 3466: 	return(0);
 3467:     }
 3468:     return(exec->status);
 3469: }
 3470: 
 3471: /************************************************************************
 3472:  * 									*
 3473:  *	Progressive interface to the verifier one atom at a time	*
 3474:  * 									*
 3475:  ************************************************************************/
 3476: #ifdef DEBUG_ERR
 3477: static void testerr(xmlRegExecCtxtPtr exec);
 3478: #endif
 3479: 
 3480: /**
 3481:  * xmlRegNewExecCtxt:
 3482:  * @comp: a precompiled regular expression
 3483:  * @callback: a callback function used for handling progresses in the
 3484:  *            automata matching phase
 3485:  * @data: the context data associated to the callback in this context
 3486:  *
 3487:  * Build a context used for progressive evaluation of a regexp.
 3488:  *
 3489:  * Returns the new context
 3490:  */
 3491: xmlRegExecCtxtPtr
 3492: xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
 3493:     xmlRegExecCtxtPtr exec;
 3494: 
 3495:     if (comp == NULL)
 3496: 	return(NULL);
 3497:     if ((comp->compact == NULL) && (comp->states == NULL))
 3498:         return(NULL);
 3499:     exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
 3500:     if (exec == NULL) {
 3501: 	xmlRegexpErrMemory(NULL, "creating execution context");
 3502: 	return(NULL);
 3503:     }
 3504:     memset(exec, 0, sizeof(xmlRegExecCtxt));
 3505:     exec->inputString = NULL;
 3506:     exec->index = 0;
 3507:     exec->determinist = 1;
 3508:     exec->maxRollbacks = 0;
 3509:     exec->nbRollbacks = 0;
 3510:     exec->rollbacks = NULL;
 3511:     exec->status = 0;
 3512:     exec->comp = comp;
 3513:     if (comp->compact == NULL)
 3514: 	exec->state = comp->states[0];
 3515:     exec->transno = 0;
 3516:     exec->transcount = 0;
 3517:     exec->callback = callback;
 3518:     exec->data = data;
 3519:     if (comp->nbCounters > 0) {
 3520:         /*
 3521: 	 * For error handling, exec->counts is allocated twice the size
 3522: 	 * the second half is used to store the data in case of rollback
 3523: 	 */
 3524: 	exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int)
 3525: 	                                 * 2);
 3526: 	if (exec->counts == NULL) {
 3527: 	    xmlRegexpErrMemory(NULL, "creating execution context");
 3528: 	    xmlFree(exec);
 3529: 	    return(NULL);
 3530: 	}
 3531:         memset(exec->counts, 0, comp->nbCounters * sizeof(int) * 2);
 3532: 	exec->errCounts = &exec->counts[comp->nbCounters];
 3533:     } else {
 3534: 	exec->counts = NULL;
 3535: 	exec->errCounts = NULL;
 3536:     }
 3537:     exec->inputStackMax = 0;
 3538:     exec->inputStackNr = 0;
 3539:     exec->inputStack = NULL;
 3540:     exec->errStateNo = -1;
 3541:     exec->errString = NULL;
 3542:     exec->nbPush = 0;
 3543:     return(exec);
 3544: }
 3545: 
 3546: /**
 3547:  * xmlRegFreeExecCtxt:
 3548:  * @exec: a regular expression evaulation context
 3549:  *
 3550:  * Free the structures associated to a regular expression evaulation context.
 3551:  */
 3552: void
 3553: xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
 3554:     if (exec == NULL)
 3555: 	return;
 3556: 
 3557:     if (exec->rollbacks != NULL) {
 3558: 	if (exec->counts != NULL) {
 3559: 	    int i;
 3560: 
 3561: 	    for (i = 0;i < exec->maxRollbacks;i++)
 3562: 		if (exec->rollbacks[i].counts != NULL)
 3563: 		    xmlFree(exec->rollbacks[i].counts);
 3564: 	}
 3565: 	xmlFree(exec->rollbacks);
 3566:     }
 3567:     if (exec->counts != NULL)
 3568: 	xmlFree(exec->counts);
 3569:     if (exec->inputStack != NULL) {
 3570: 	int i;
 3571: 
 3572: 	for (i = 0;i < exec->inputStackNr;i++) {
 3573: 	    if (exec->inputStack[i].value != NULL)
 3574: 		xmlFree(exec->inputStack[i].value);
 3575: 	}
 3576: 	xmlFree(exec->inputStack);
 3577:     }
 3578:     if (exec->errString != NULL)
 3579:         xmlFree(exec->errString);
 3580:     xmlFree(exec);
 3581: }
 3582: 
 3583: static void
 3584: xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 3585: 	                    void *data) {
 3586: #ifdef DEBUG_PUSH
 3587:     printf("saving value: %d:%s\n", exec->inputStackNr, value);
 3588: #endif
 3589:     if (exec->inputStackMax == 0) {
 3590: 	exec->inputStackMax = 4;
 3591: 	exec->inputStack = (xmlRegInputTokenPtr) 
 3592: 	    xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
 3593: 	if (exec->inputStack == NULL) {
 3594: 	    xmlRegexpErrMemory(NULL, "pushing input string");
 3595: 	    exec->inputStackMax = 0;
 3596: 	    return;
 3597: 	}
 3598:     } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
 3599: 	xmlRegInputTokenPtr tmp;
 3600: 
 3601: 	exec->inputStackMax *= 2;
 3602: 	tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
 3603: 			exec->inputStackMax * sizeof(xmlRegInputToken));
 3604: 	if (tmp == NULL) {
 3605: 	    xmlRegexpErrMemory(NULL, "pushing input string");
 3606: 	    exec->inputStackMax /= 2;
 3607: 	    return;
 3608: 	}
 3609: 	exec->inputStack = tmp;
 3610:     }
 3611:     exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
 3612:     exec->inputStack[exec->inputStackNr].data = data;
 3613:     exec->inputStackNr++;
 3614:     exec->inputStack[exec->inputStackNr].value = NULL;
 3615:     exec->inputStack[exec->inputStackNr].data = NULL;
 3616: }
 3617: 
 3618: /**
 3619:  * xmlRegStrEqualWildcard:
 3620:  * @expStr:  the string to be evaluated 
 3621:  * @valStr:  the validation string
 3622:  *
 3623:  * Checks if both strings are equal or have the same content. "*"
 3624:  * can be used as a wildcard in @valStr; "|" is used as a seperator of 
 3625:  * substrings in both @expStr and @valStr.
 3626:  *
 3627:  * Returns 1 if the comparison is satisfied and the number of substrings
 3628:  * is equal, 0 otherwise.
 3629:  */
 3630: 
 3631: static int
 3632: xmlRegStrEqualWildcard(const xmlChar *expStr, const xmlChar *valStr) {
 3633:     if (expStr == valStr) return(1);
 3634:     if (expStr == NULL) return(0);
 3635:     if (valStr == NULL) return(0);
 3636:     do {
 3637: 	/*
 3638: 	* Eval if we have a wildcard for the current item.
 3639: 	*/
 3640:         if (*expStr != *valStr) {
 3641: 	    /* if one of them starts with a wildcard make valStr be it */
 3642: 	    if (*valStr == '*') {
 3643: 	        const xmlChar *tmp;
 3644: 
 3645: 		tmp = valStr;
 3646: 		valStr = expStr;
 3647: 		expStr = tmp;
 3648: 	    }
 3649: 	    if ((*valStr != 0) && (*expStr != 0) && (*expStr++ == '*')) {
 3650: 		do {
 3651: 		    if (*valStr == XML_REG_STRING_SEPARATOR)
 3652: 			break;
 3653: 		    valStr++;
 3654: 		} while (*valStr != 0);
 3655: 		continue;
 3656: 	    } else
 3657: 		return(0);
 3658: 	}
 3659: 	expStr++;
 3660: 	valStr++;
 3661:     } while (*valStr != 0);
 3662:     if (*expStr != 0)
 3663: 	return (0);
 3664:     else
 3665: 	return (1);
 3666: }
 3667: 
 3668: /**
 3669:  * xmlRegCompactPushString:
 3670:  * @exec: a regexp execution context
 3671:  * @comp:  the precompiled exec with a compact table
 3672:  * @value: a string token input
 3673:  * @data: data associated to the token to reuse in callbacks
 3674:  *
 3675:  * Push one input token in the execution context
 3676:  *
 3677:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 3678:  *     a negative value in case of error.
 3679:  */
 3680: static int
 3681: xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
 3682: 	                xmlRegexpPtr comp,
 3683: 	                const xmlChar *value,
 3684: 	                void *data) {
 3685:     int state = exec->index;
 3686:     int i, target;
 3687: 
 3688:     if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
 3689: 	return(-1);
 3690:     
 3691:     if (value == NULL) {
 3692: 	/*
 3693: 	 * are we at a final state ?
 3694: 	 */
 3695: 	if (comp->compact[state * (comp->nbstrings + 1)] ==
 3696:             XML_REGEXP_FINAL_STATE)
 3697: 	    return(1);
 3698: 	return(0);
 3699:     }
 3700: 
 3701: #ifdef DEBUG_PUSH
 3702:     printf("value pushed: %s\n", value);
 3703: #endif
 3704: 
 3705:     /*
 3706:      * Examine all outside transitions from current state
 3707:      */
 3708:     for (i = 0;i < comp->nbstrings;i++) {
 3709: 	target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 3710: 	if ((target > 0) && (target <= comp->nbstates)) {
 3711: 	    target--; /* to avoid 0 */    
 3712: 	    if (xmlRegStrEqualWildcard(comp->stringMap[i], value)) {
 3713: 		exec->index = target;		
 3714: 		if ((exec->callback != NULL) && (comp->transdata != NULL)) {
 3715: 		    exec->callback(exec->data, value,
 3716: 			  comp->transdata[state * comp->nbstrings + i], data);
 3717: 		}
 3718: #ifdef DEBUG_PUSH
 3719: 		printf("entering state %d\n", target);
 3720: #endif
 3721: 		if (comp->compact[target * (comp->nbstrings + 1)] ==
 3722: 		    XML_REGEXP_SINK_STATE)
 3723: 		    goto error;
 3724: 
 3725: 		if (comp->compact[target * (comp->nbstrings + 1)] ==
 3726: 		    XML_REGEXP_FINAL_STATE)
 3727: 		    return(1);
 3728: 		return(0);
 3729: 	    }
 3730: 	}
 3731:     }
 3732:     /*
 3733:      * Failed to find an exit transition out from current state for the
 3734:      * current token
 3735:      */
 3736: #ifdef DEBUG_PUSH
 3737:     printf("failed to find a transition for %s on state %d\n", value, state);
 3738: #endif
 3739: error:
 3740:     if (exec->errString != NULL)
 3741:         xmlFree(exec->errString);
 3742:     exec->errString = xmlStrdup(value);
 3743:     exec->errStateNo = state;
 3744:     exec->status = -1;
 3745: #ifdef DEBUG_ERR
 3746:     testerr(exec);
 3747: #endif
 3748:     return(-1);
 3749: }
 3750: 
 3751: /**
 3752:  * xmlRegExecPushStringInternal:
 3753:  * @exec: a regexp execution context or NULL to indicate the end
 3754:  * @value: a string token input
 3755:  * @data: data associated to the token to reuse in callbacks
 3756:  * @compound: value was assembled from 2 strings
 3757:  *
 3758:  * Push one input token in the execution context
 3759:  *
 3760:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 3761:  *     a negative value in case of error.
 3762:  */
 3763: static int
 3764: xmlRegExecPushStringInternal(xmlRegExecCtxtPtr exec, const xmlChar *value,
 3765: 	                     void *data, int compound) {
 3766:     xmlRegTransPtr trans;
 3767:     xmlRegAtomPtr atom;
 3768:     int ret;
 3769:     int final = 0;
 3770:     int progress = 1;
 3771: 
 3772:     if (exec == NULL)
 3773: 	return(-1);
 3774:     if (exec->comp == NULL)
 3775: 	return(-1);
 3776:     if (exec->status != 0)
 3777: 	return(exec->status);
 3778: 
 3779:     if (exec->comp->compact != NULL)
 3780: 	return(xmlRegCompactPushString(exec, exec->comp, value, data));
 3781: 
 3782:     if (value == NULL) {
 3783:         if (exec->state->type == XML_REGEXP_FINAL_STATE)
 3784: 	    return(1);
 3785: 	final = 1;
 3786:     }
 3787: 
 3788: #ifdef DEBUG_PUSH
 3789:     printf("value pushed: %s\n", value);
 3790: #endif
 3791:     /*
 3792:      * If we have an active rollback stack push the new value there
 3793:      * and get back to where we were left
 3794:      */
 3795:     if ((value != NULL) && (exec->inputStackNr > 0)) {
 3796: 	xmlFARegExecSaveInputString(exec, value, data);
 3797: 	value = exec->inputStack[exec->index].value;
 3798: 	data = exec->inputStack[exec->index].data;
 3799: #ifdef DEBUG_PUSH
 3800: 	printf("value loaded: %s\n", value);
 3801: #endif
 3802:     }
 3803: 
 3804:     while ((exec->status == 0) &&
 3805: 	   ((value != NULL) ||
 3806: 	    ((final == 1) &&
 3807: 	     (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
 3808: 
 3809: 	/*
 3810: 	 * End of input on non-terminal state, rollback, however we may
 3811: 	 * still have epsilon like transition for counted transitions
 3812: 	 * on counters, in that case don't break too early.
 3813: 	 */
 3814: 	if ((value == NULL) && (exec->counts == NULL))
 3815: 	    goto rollback;
 3816: 
 3817: 	exec->transcount = 0;
 3818: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 3819: 	    trans = &exec->state->trans[exec->transno];
 3820: 	    if (trans->to < 0)
 3821: 		continue;
 3822: 	    atom = trans->atom;
 3823: 	    ret = 0;
 3824: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 3825: 		int i;
 3826: 		int count;
 3827: 		xmlRegTransPtr t;
 3828: 		xmlRegCounterPtr counter;
 3829: 
 3830: 		ret = 0;
 3831: 
 3832: #ifdef DEBUG_PUSH
 3833: 		printf("testing all lax %d\n", trans->count);
 3834: #endif
 3835: 		/*
 3836: 		 * Check all counted transitions from the current state
 3837: 		 */
 3838: 		if ((value == NULL) && (final)) {
 3839: 		    ret = 1;
 3840: 		} else if (value != NULL) {
 3841: 		    for (i = 0;i < exec->state->nbTrans;i++) {
 3842: 			t = &exec->state->trans[i];
 3843: 			if ((t->counter < 0) || (t == trans))
 3844: 			    continue;
 3845: 			counter = &exec->comp->counters[t->counter];
 3846: 			count = exec->counts[t->counter];
 3847: 			if ((count < counter->max) && 
 3848: 		            (t->atom != NULL) &&
 3849: 			    (xmlStrEqual(value, t->atom->valuep))) {
 3850: 			    ret = 0;
 3851: 			    break;
 3852: 			}
 3853: 			if ((count >= counter->min) &&
 3854: 			    (count < counter->max) &&
 3855: 			    (t->atom != NULL) &&
 3856: 			    (xmlStrEqual(value, t->atom->valuep))) {
 3857: 			    ret = 1;
 3858: 			    break;
 3859: 			}
 3860: 		    }
 3861: 		}
 3862: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 3863: 		int i;
 3864: 		int count;
 3865: 		xmlRegTransPtr t;
 3866: 		xmlRegCounterPtr counter;
 3867: 
 3868: 		ret = 1;
 3869: 
 3870: #ifdef DEBUG_PUSH
 3871: 		printf("testing all %d\n", trans->count);
 3872: #endif
 3873: 		/*
 3874: 		 * Check all counted transitions from the current state
 3875: 		 */
 3876: 		for (i = 0;i < exec->state->nbTrans;i++) {
 3877:                     t = &exec->state->trans[i];
 3878: 		    if ((t->counter < 0) || (t == trans))
 3879: 			continue;
 3880:                     counter = &exec->comp->counters[t->counter];
 3881: 		    count = exec->counts[t->counter];
 3882: 		    if ((count < counter->min) || (count > counter->max)) {
 3883: 			ret = 0;
 3884: 			break;
 3885: 		    }
 3886: 		}
 3887: 	    } else if (trans->count >= 0) {
 3888: 		int count;
 3889: 		xmlRegCounterPtr counter;
 3890: 
 3891: 		/*
 3892: 		 * A counted transition.
 3893: 		 */
 3894: 
 3895: 		count = exec->counts[trans->count];
 3896: 		counter = &exec->comp->counters[trans->count];
 3897: #ifdef DEBUG_PUSH
 3898: 		printf("testing count %d: val %d, min %d, max %d\n",
 3899: 		       trans->count, count, counter->min,  counter->max);
 3900: #endif
 3901: 		ret = ((count >= counter->min) && (count <= counter->max));
 3902: 	    } else if (atom == NULL) {
 3903: 		fprintf(stderr, "epsilon transition left at runtime\n");
 3904: 		exec->status = -2;
 3905: 		break;
 3906: 	    } else if (value != NULL) {
 3907: 		ret = xmlRegStrEqualWildcard(atom->valuep, value);
 3908: 		if (atom->neg) {
 3909: 		    ret = !ret;
 3910: 		    if (!compound)
 3911: 		        ret = 0;
 3912: 		}
 3913: 		if ((ret == 1) && (trans->counter >= 0)) {
 3914: 		    xmlRegCounterPtr counter;
 3915: 		    int count;
 3916: 
 3917: 		    count = exec->counts[trans->counter];
 3918: 		    counter = &exec->comp->counters[trans->counter];
 3919: 		    if (count >= counter->max)
 3920: 			ret = 0;
 3921: 		}
 3922: 
 3923: 		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
 3924: 		    xmlRegStatePtr to = exec->comp->states[trans->to];
 3925: 
 3926: 		    /*
 3927: 		     * this is a multiple input sequence
 3928: 		     */
 3929: 		    if (exec->state->nbTrans > exec->transno + 1) {
 3930: 			if (exec->inputStackNr <= 0) {
 3931: 			    xmlFARegExecSaveInputString(exec, value, data);
 3932: 			}
 3933: 			xmlFARegExecSave(exec);
 3934: 		    }
 3935: 		    exec->transcount = 1;
 3936: 		    do {
 3937: 			/*
 3938: 			 * Try to progress as much as possible on the input
 3939: 			 */
 3940: 			if (exec->transcount == atom->max) {
 3941: 			    break;
 3942: 			}
 3943: 			exec->index++;
 3944: 			value = exec->inputStack[exec->index].value;
 3945: 			data = exec->inputStack[exec->index].data;
 3946: #ifdef DEBUG_PUSH
 3947: 			printf("value loaded: %s\n", value);
 3948: #endif
 3949: 
 3950: 			/*
 3951: 			 * End of input: stop here
 3952: 			 */
 3953: 			if (value == NULL) {
 3954: 			    exec->index --;
 3955: 			    break;
 3956: 			}
 3957: 			if (exec->transcount >= atom->min) {
 3958: 			    int transno = exec->transno;
 3959: 			    xmlRegStatePtr state = exec->state;
 3960: 
 3961: 			    /*
 3962: 			     * The transition is acceptable save it
 3963: 			     */
 3964: 			    exec->transno = -1; /* trick */
 3965: 			    exec->state = to;
 3966: 			    if (exec->inputStackNr <= 0) {
 3967: 				xmlFARegExecSaveInputString(exec, value, data);
 3968: 			    }
 3969: 			    xmlFARegExecSave(exec);
 3970: 			    exec->transno = transno;
 3971: 			    exec->state = state;
 3972: 			}
 3973: 			ret = xmlStrEqual(value, atom->valuep);
 3974: 			exec->transcount++;
 3975: 		    } while (ret == 1);
 3976: 		    if (exec->transcount < atom->min)
 3977: 			ret = 0;
 3978: 
 3979: 		    /*
 3980: 		     * If the last check failed but one transition was found
 3981: 		     * possible, rollback
 3982: 		     */
 3983: 		    if (ret < 0)
 3984: 			ret = 0;
 3985: 		    if (ret == 0) {
 3986: 			goto rollback;
 3987: 		    }
 3988: 		}
 3989: 	    }
 3990: 	    if (ret == 1) {
 3991: 		if ((exec->callback != NULL) && (atom != NULL) &&
 3992: 			(data != NULL)) {
 3993: 		    exec->callback(exec->data, atom->valuep,
 3994: 			           atom->data, data);
 3995: 		}
 3996: 		if (exec->state->nbTrans > exec->transno + 1) {
 3997: 		    if (exec->inputStackNr <= 0) {
 3998: 			xmlFARegExecSaveInputString(exec, value, data);
 3999: 		    }
 4000: 		    xmlFARegExecSave(exec);
 4001: 		}
 4002: 		if (trans->counter >= 0) {
 4003: #ifdef DEBUG_PUSH
 4004: 		    printf("Increasing count %d\n", trans->counter);
 4005: #endif
 4006: 		    exec->counts[trans->counter]++;
 4007: 		}
 4008: 		if ((trans->count >= 0) &&
 4009: 		    (trans->count < REGEXP_ALL_COUNTER)) {
 4010: #ifdef DEBUG_REGEXP_EXEC
 4011: 		    printf("resetting count %d on transition\n",
 4012: 		           trans->count);
 4013: #endif
 4014: 		    exec->counts[trans->count] = 0;
 4015: 		}
 4016: #ifdef DEBUG_PUSH
 4017: 		printf("entering state %d\n", trans->to);
 4018: #endif
 4019:                 if ((exec->comp->states[trans->to] != NULL) &&
 4020: 		    (exec->comp->states[trans->to]->type ==
 4021: 		     XML_REGEXP_SINK_STATE)) {
 4022: 		    /*
 4023: 		     * entering a sink state, save the current state as error
 4024: 		     * state.
 4025: 		     */
 4026: 		    if (exec->errString != NULL)
 4027: 			xmlFree(exec->errString);
 4028: 		    exec->errString = xmlStrdup(value);
 4029: 		    exec->errState = exec->state;
 4030: 		    memcpy(exec->errCounts, exec->counts,
 4031: 			   exec->comp->nbCounters * sizeof(int));
 4032: 		}
 4033: 		exec->state = exec->comp->states[trans->to];
 4034: 		exec->transno = 0;
 4035: 		if (trans->atom != NULL) {
 4036: 		    if (exec->inputStack != NULL) {
 4037: 			exec->index++;
 4038: 			if (exec->index < exec->inputStackNr) {
 4039: 			    value = exec->inputStack[exec->index].value;
 4040: 			    data = exec->inputStack[exec->index].data;
 4041: #ifdef DEBUG_PUSH
 4042: 			    printf("value loaded: %s\n", value);
 4043: #endif
 4044: 			} else {
 4045: 			    value = NULL;
 4046: 			    data = NULL;
 4047: #ifdef DEBUG_PUSH
 4048: 			    printf("end of input\n");
 4049: #endif
 4050: 			}
 4051: 		    } else {
 4052: 			value = NULL;
 4053: 			data = NULL;
 4054: #ifdef DEBUG_PUSH
 4055: 			printf("end of input\n");
 4056: #endif
 4057: 		    }
 4058: 		}
 4059: 		goto progress;
 4060: 	    } else if (ret < 0) {
 4061: 		exec->status = -4;
 4062: 		break;
 4063: 	    }
 4064: 	}
 4065: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 4066: rollback:
 4067:             /*
 4068: 	     * if we didn't yet rollback on the current input
 4069: 	     * store the current state as the error state.
 4070: 	     */
 4071: 	    if ((progress) && (exec->state != NULL) &&
 4072: 	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
 4073: 	        progress = 0;
 4074: 		if (exec->errString != NULL)
 4075: 		    xmlFree(exec->errString);
 4076: 		exec->errString = xmlStrdup(value);
 4077: 		exec->errState = exec->state;
 4078: 		memcpy(exec->errCounts, exec->counts,
 4079: 		       exec->comp->nbCounters * sizeof(int));
 4080: 	    }
 4081: 
 4082: 	    /*
 4083: 	     * Failed to find a way out
 4084: 	     */
 4085: 	    exec->determinist = 0;
 4086: 	    xmlFARegExecRollBack(exec);
 4087: 	    if (exec->status == 0) {
 4088: 		value = exec->inputStack[exec->index].value;
 4089: 		data = exec->inputStack[exec->index].data;
 4090: #ifdef DEBUG_PUSH
 4091: 		printf("value loaded: %s\n", value);
 4092: #endif
 4093: 	    }
 4094: 	}
 4095: 	continue;
 4096: progress:
 4097:         progress = 1;
 4098: 	continue;
 4099:     }
 4100:     if (exec->status == 0) {
 4101:         return(exec->state->type == XML_REGEXP_FINAL_STATE);
 4102:     }
 4103: #ifdef DEBUG_ERR
 4104:     if (exec->status < 0) {
 4105: 	testerr(exec);
 4106:     }
 4107: #endif
 4108:     return(exec->status);
 4109: }
 4110: 
 4111: /**
 4112:  * xmlRegExecPushString:
 4113:  * @exec: a regexp execution context or NULL to indicate the end
 4114:  * @value: a string token input
 4115:  * @data: data associated to the token to reuse in callbacks
 4116:  *
 4117:  * Push one input token in the execution context
 4118:  *
 4119:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 4120:  *     a negative value in case of error.
 4121:  */
 4122: int
 4123: xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
 4124: 	             void *data) {
 4125:     return(xmlRegExecPushStringInternal(exec, value, data, 0));
 4126: }
 4127: 
 4128: /**
 4129:  * xmlRegExecPushString2:
 4130:  * @exec: a regexp execution context or NULL to indicate the end
 4131:  * @value: the first string token input
 4132:  * @value2: the second string token input
 4133:  * @data: data associated to the token to reuse in callbacks
 4134:  *
 4135:  * Push one input token in the execution context
 4136:  *
 4137:  * Returns: 1 if the regexp reached a final state, 0 if non-final, and
 4138:  *     a negative value in case of error.
 4139:  */
 4140: int
 4141: xmlRegExecPushString2(xmlRegExecCtxtPtr exec, const xmlChar *value,
 4142:                       const xmlChar *value2, void *data) {
 4143:     xmlChar buf[150];
 4144:     int lenn, lenp, ret;
 4145:     xmlChar *str;
 4146: 
 4147:     if (exec == NULL)
 4148: 	return(-1);
 4149:     if (exec->comp == NULL)
 4150: 	return(-1);
 4151:     if (exec->status != 0)
 4152: 	return(exec->status);
 4153: 
 4154:     if (value2 == NULL)
 4155:         return(xmlRegExecPushString(exec, value, data));
 4156: 
 4157:     lenn = strlen((char *) value2);
 4158:     lenp = strlen((char *) value);
 4159: 
 4160:     if (150 < lenn + lenp + 2) {
 4161: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 4162: 	if (str == NULL) {
 4163: 	    exec->status = -1;
 4164: 	    return(-1);
 4165: 	}
 4166:     } else {
 4167: 	str = buf;
 4168:     }
 4169:     memcpy(&str[0], value, lenp);
 4170:     str[lenp] = XML_REG_STRING_SEPARATOR;
 4171:     memcpy(&str[lenp + 1], value2, lenn);
 4172:     str[lenn + lenp + 1] = 0;
 4173: 
 4174:     if (exec->comp->compact != NULL)
 4175: 	ret = xmlRegCompactPushString(exec, exec->comp, str, data);
 4176:     else
 4177:         ret = xmlRegExecPushStringInternal(exec, str, data, 1);
 4178: 
 4179:     if (str != buf)
 4180:         xmlFree(str);
 4181:     return(ret);
 4182: }
 4183: 
 4184: /**
 4185:  * xmlRegExecGetValues:
 4186:  * @exec: a regexp execution context
 4187:  * @err: error extraction or normal one
 4188:  * @nbval: pointer to the number of accepted values IN/OUT
 4189:  * @nbneg: return number of negative transitions
 4190:  * @values: pointer to the array of acceptable values
 4191:  * @terminal: return value if this was a terminal state
 4192:  *
 4193:  * Extract informations from the regexp execution, internal routine to
 4194:  * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
 4195:  *
 4196:  * Returns: 0 in case of success or -1 in case of error.
 4197:  */
 4198: static int
 4199: xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
 4200:                     int *nbval, int *nbneg,
 4201: 		    xmlChar **values, int *terminal) {
 4202:     int maxval;
 4203:     int nb = 0;
 4204: 
 4205:     if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
 4206:         (values == NULL) || (*nbval <= 0))
 4207:         return(-1);
 4208: 
 4209:     maxval = *nbval;
 4210:     *nbval = 0;
 4211:     *nbneg = 0;
 4212:     if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
 4213:         xmlRegexpPtr comp;
 4214: 	int target, i, state;
 4215: 
 4216:         comp = exec->comp;
 4217: 
 4218: 	if (err) {
 4219: 	    if (exec->errStateNo == -1) return(-1);
 4220: 	    state = exec->errStateNo;
 4221: 	} else {
 4222: 	    state = exec->index;
 4223: 	}
 4224: 	if (terminal != NULL) {
 4225: 	    if (comp->compact[state * (comp->nbstrings + 1)] ==
 4226: 	        XML_REGEXP_FINAL_STATE)
 4227: 		*terminal = 1;
 4228: 	    else
 4229: 		*terminal = 0;
 4230: 	}
 4231: 	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 4232: 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 4233: 	    if ((target > 0) && (target <= comp->nbstates) &&
 4234: 	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
 4235: 		 XML_REGEXP_SINK_STATE)) {
 4236: 	        values[nb++] = comp->stringMap[i];
 4237: 		(*nbval)++;
 4238: 	    }
 4239: 	}
 4240: 	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 4241: 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
 4242: 	    if ((target > 0) && (target <= comp->nbstates) &&
 4243: 	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
 4244: 		 XML_REGEXP_SINK_STATE)) {
 4245: 	        values[nb++] = comp->stringMap[i];
 4246: 		(*nbneg)++;
 4247: 	    }
 4248: 	}
 4249:     } else {
 4250:         int transno;
 4251: 	xmlRegTransPtr trans;
 4252: 	xmlRegAtomPtr atom;
 4253: 	xmlRegStatePtr state;
 4254: 
 4255: 	if (terminal != NULL) {
 4256: 	    if (exec->state->type == XML_REGEXP_FINAL_STATE)
 4257: 		*terminal = 1;
 4258: 	    else
 4259: 		*terminal = 0;
 4260: 	}
 4261: 
 4262: 	if (err) {
 4263: 	    if (exec->errState == NULL) return(-1);
 4264: 	    state = exec->errState;
 4265: 	} else {
 4266: 	    if (exec->state == NULL) return(-1);
 4267: 	    state = exec->state;
 4268: 	}
 4269: 	for (transno = 0;
 4270: 	     (transno < state->nbTrans) && (nb < maxval);
 4271: 	     transno++) {
 4272: 	    trans = &state->trans[transno];
 4273: 	    if (trans->to < 0)
 4274: 		continue;
 4275: 	    atom = trans->atom;
 4276: 	    if ((atom == NULL) || (atom->valuep == NULL))
 4277: 		continue;
 4278: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 4279: 	        /* this should not be reached but ... */
 4280: 	        TODO;
 4281: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 4282: 	        /* this should not be reached but ... */
 4283: 	        TODO;
 4284: 	    } else if (trans->counter >= 0) {
 4285: 		xmlRegCounterPtr counter = NULL;
 4286: 		int count;
 4287: 
 4288: 		if (err)
 4289: 		    count = exec->errCounts[trans->counter];
 4290: 		else
 4291: 		    count = exec->counts[trans->counter];
 4292: 		if (exec->comp != NULL)
 4293: 		    counter = &exec->comp->counters[trans->counter];
 4294: 		if ((counter == NULL) || (count < counter->max)) {
 4295: 		    if (atom->neg)
 4296: 			values[nb++] = (xmlChar *) atom->valuep2;
 4297: 		    else
 4298: 			values[nb++] = (xmlChar *) atom->valuep;
 4299: 		    (*nbval)++;
 4300: 		}
 4301: 	    } else {
 4302:                 if ((exec->comp->states[trans->to] != NULL) &&
 4303: 		    (exec->comp->states[trans->to]->type !=
 4304: 		     XML_REGEXP_SINK_STATE)) {
 4305: 		    if (atom->neg)
 4306: 			values[nb++] = (xmlChar *) atom->valuep2;
 4307: 		    else
 4308: 			values[nb++] = (xmlChar *) atom->valuep;
 4309: 		    (*nbval)++;
 4310: 		}
 4311: 	    } 
 4312: 	}
 4313: 	for (transno = 0;
 4314: 	     (transno < state->nbTrans) && (nb < maxval);
 4315: 	     transno++) {
 4316: 	    trans = &state->trans[transno];
 4317: 	    if (trans->to < 0)
 4318: 		continue;
 4319: 	    atom = trans->atom;
 4320: 	    if ((atom == NULL) || (atom->valuep == NULL))
 4321: 		continue;
 4322: 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
 4323: 	        continue;
 4324: 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
 4325: 	        continue;
 4326: 	    } else if (trans->counter >= 0) {
 4327: 	        continue;
 4328: 	    } else {
 4329:                 if ((exec->comp->states[trans->to] != NULL) &&
 4330: 		    (exec->comp->states[trans->to]->type ==
 4331: 		     XML_REGEXP_SINK_STATE)) {
 4332: 		    if (atom->neg)
 4333: 			values[nb++] = (xmlChar *) atom->valuep2;
 4334: 		    else
 4335: 			values[nb++] = (xmlChar *) atom->valuep;
 4336: 		    (*nbneg)++;
 4337: 		}
 4338: 	    } 
 4339: 	}
 4340:     }
 4341:     return(0);
 4342: }
 4343: 
 4344: /**
 4345:  * xmlRegExecNextValues:
 4346:  * @exec: a regexp execution context
 4347:  * @nbval: pointer to the number of accepted values IN/OUT
 4348:  * @nbneg: return number of negative transitions
 4349:  * @values: pointer to the array of acceptable values
 4350:  * @terminal: return value if this was a terminal state
 4351:  *
 4352:  * Extract informations from the regexp execution,
 4353:  * the parameter @values must point to an array of @nbval string pointers
 4354:  * on return nbval will contain the number of possible strings in that
 4355:  * state and the @values array will be updated with them. The string values
 4356:  * returned will be freed with the @exec context and don't need to be
 4357:  * deallocated.
 4358:  *
 4359:  * Returns: 0 in case of success or -1 in case of error.
 4360:  */
 4361: int
 4362: xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
 4363:                      xmlChar **values, int *terminal) {
 4364:     return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
 4365: }
 4366: 
 4367: /**
 4368:  * xmlRegExecErrInfo:
 4369:  * @exec: a regexp execution context generating an error
 4370:  * @string: return value for the error string
 4371:  * @nbval: pointer to the number of accepted values IN/OUT
 4372:  * @nbneg: return number of negative transitions
 4373:  * @values: pointer to the array of acceptable values
 4374:  * @terminal: return value if this was a terminal state
 4375:  *
 4376:  * Extract error informations from the regexp execution, the parameter
 4377:  * @string will be updated with the value pushed and not accepted,
 4378:  * the parameter @values must point to an array of @nbval string pointers
 4379:  * on return nbval will contain the number of possible strings in that
 4380:  * state and the @values array will be updated with them. The string values
 4381:  * returned will be freed with the @exec context and don't need to be
 4382:  * deallocated.
 4383:  *
 4384:  * Returns: 0 in case of success or -1 in case of error.
 4385:  */
 4386: int
 4387: xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
 4388:                   int *nbval, int *nbneg, xmlChar **values, int *terminal) {
 4389:     if (exec == NULL)
 4390:         return(-1);
 4391:     if (string != NULL) {
 4392:         if (exec->status != 0)
 4393: 	    *string = exec->errString;
 4394: 	else
 4395: 	    *string = NULL;
 4396:     }
 4397:     return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
 4398: }
 4399: 
 4400: #ifdef DEBUG_ERR
 4401: static void testerr(xmlRegExecCtxtPtr exec) {
 4402:     const xmlChar *string;
 4403:     xmlChar *values[5];
 4404:     int nb = 5;
 4405:     int nbneg;
 4406:     int terminal;
 4407:     xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
 4408: }
 4409: #endif
 4410: 
 4411: #if 0
 4412: static int
 4413: xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
 4414:     xmlRegTransPtr trans;
 4415:     xmlRegAtomPtr atom;
 4416:     int ret;
 4417:     int codepoint, len;
 4418: 
 4419:     if (exec == NULL)
 4420: 	return(-1);
 4421:     if (exec->status != 0)
 4422: 	return(exec->status);
 4423: 
 4424:     while ((exec->status == 0) &&
 4425: 	   ((exec->inputString[exec->index] != 0) ||
 4426: 	    (exec->state->type != XML_REGEXP_FINAL_STATE))) {
 4427: 
 4428: 	/*
 4429: 	 * End of input on non-terminal state, rollback, however we may
 4430: 	 * still have epsilon like transition for counted transitions
 4431: 	 * on counters, in that case don't break too early.
 4432: 	 */
 4433: 	if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
 4434: 	    goto rollback;
 4435: 
 4436: 	exec->transcount = 0;
 4437: 	for (;exec->transno < exec->state->nbTrans;exec->transno++) {
 4438: 	    trans = &exec->state->trans[exec->transno];
 4439: 	    if (trans->to < 0)
 4440: 		continue;
 4441: 	    atom = trans->atom;
 4442: 	    ret = 0;
 4443: 	    if (trans->count >= 0) {
 4444: 		int count;
 4445: 		xmlRegCounterPtr counter;
 4446: 
 4447: 		/*
 4448: 		 * A counted transition.
 4449: 		 */
 4450: 
 4451: 		count = exec->counts[trans->count];
 4452: 		counter = &exec->comp->counters[trans->count];
 4453: #ifdef DEBUG_REGEXP_EXEC
 4454: 		printf("testing count %d: val %d, min %d, max %d\n",
 4455: 		       trans->count, count, counter->min,  counter->max);
 4456: #endif
 4457: 		ret = ((count >= counter->min) && (count <= counter->max));
 4458: 	    } else if (atom == NULL) {
 4459: 		fprintf(stderr, "epsilon transition left at runtime\n");
 4460: 		exec->status = -2;
 4461: 		break;
 4462: 	    } else if (exec->inputString[exec->index] != 0) {
 4463:                 codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
 4464: 		ret = xmlRegCheckCharacter(atom, codepoint);
 4465: 		if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
 4466: 		    xmlRegStatePtr to = exec->comp->states[trans->to];
 4467: 
 4468: 		    /*
 4469: 		     * this is a multiple input sequence
 4470: 		     */
 4471: 		    if (exec->state->nbTrans > exec->transno + 1) {
 4472: 			xmlFARegExecSave(exec);
 4473: 		    }
 4474: 		    exec->transcount = 1;
 4475: 		    do {
 4476: 			/*
 4477: 			 * Try to progress as much as possible on the input
 4478: 			 */
 4479: 			if (exec->transcount == atom->max) {
 4480: 			    break;
 4481: 			}
 4482: 			exec->index += len;
 4483: 			/*
 4484: 			 * End of input: stop here
 4485: 			 */
 4486: 			if (exec->inputString[exec->index] == 0) {
 4487: 			    exec->index -= len;
 4488: 			    break;
 4489: 			}
 4490: 			if (exec->transcount >= atom->min) {
 4491: 			    int transno = exec->transno;
 4492: 			    xmlRegStatePtr state = exec->state;
 4493: 
 4494: 			    /*
 4495: 			     * The transition is acceptable save it
 4496: 			     */
 4497: 			    exec->transno = -1; /* trick */
 4498: 			    exec->state = to;
 4499: 			    xmlFARegExecSave(exec);
 4500: 			    exec->transno = transno;
 4501: 			    exec->state = state;
 4502: 			}
 4503: 			codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
 4504: 				              len);
 4505: 			ret = xmlRegCheckCharacter(atom, codepoint);
 4506: 			exec->transcount++;
 4507: 		    } while (ret == 1);
 4508: 		    if (exec->transcount < atom->min)
 4509: 			ret = 0;
 4510: 
 4511: 		    /*
 4512: 		     * If the last check failed but one transition was found
 4513: 		     * possible, rollback
 4514: 		     */
 4515: 		    if (ret < 0)
 4516: 			ret = 0;
 4517: 		    if (ret == 0) {
 4518: 			goto rollback;
 4519: 		    }
 4520: 		}
 4521: 	    }
 4522: 	    if (ret == 1) {
 4523: 		if (exec->state->nbTrans > exec->transno + 1) {
 4524: 		    xmlFARegExecSave(exec);
 4525: 		}
 4526: 		/*
 4527: 		 * restart count for expressions like this ((abc){2})*
 4528: 		 */
 4529: 		if (trans->count >= 0) {
 4530: #ifdef DEBUG_REGEXP_EXEC
 4531: 		    printf("Reset count %d\n", trans->count);
 4532: #endif
 4533: 		    exec->counts[trans->count] = 0;
 4534: 		}
 4535: 		if (trans->counter >= 0) {
 4536: #ifdef DEBUG_REGEXP_EXEC
 4537: 		    printf("Increasing count %d\n", trans->counter);
 4538: #endif
 4539: 		    exec->counts[trans->counter]++;
 4540: 		}
 4541: #ifdef DEBUG_REGEXP_EXEC
 4542: 		printf("entering state %d\n", trans->to);
 4543: #endif
 4544: 		exec->state = exec->comp->states[trans->to];
 4545: 		exec->transno = 0;
 4546: 		if (trans->atom != NULL) {
 4547: 		    exec->index += len;
 4548: 		}
 4549: 		goto progress;
 4550: 	    } else if (ret < 0) {
 4551: 		exec->status = -4;
 4552: 		break;
 4553: 	    }
 4554: 	}
 4555: 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 4556: rollback:
 4557: 	    /*
 4558: 	     * Failed to find a way out
 4559: 	     */
 4560: 	    exec->determinist = 0;
 4561: 	    xmlFARegExecRollBack(exec);
 4562: 	}
 4563: progress:
 4564: 	continue;
 4565:     }
 4566: }
 4567: #endif
 4568: /************************************************************************
 4569:  * 									*
 4570:  *	Parser for the Schemas Datatype Regular Expressions		*
 4571:  *	http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs	*
 4572:  * 									*
 4573:  ************************************************************************/
 4574: 
 4575: /**
 4576:  * xmlFAIsChar:
 4577:  * @ctxt:  a regexp parser context
 4578:  *
 4579:  * [10]   Char   ::=   [^.\?*+()|#x5B#x5D]
 4580:  */
 4581: static int
 4582: xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
 4583:     int cur;
 4584:     int len;
 4585: 
 4586:     cur = CUR_SCHAR(ctxt->cur, len);
 4587:     if ((cur == '.') || (cur == '\\') || (cur == '?') ||
 4588: 	(cur == '*') || (cur == '+') || (cur == '(') ||
 4589: 	(cur == ')') || (cur == '|') || (cur == 0x5B) ||
 4590: 	(cur == 0x5D) || (cur == 0))
 4591: 	return(-1);
 4592:     return(cur);
 4593: }
 4594: 
 4595: /**
 4596:  * xmlFAParseCharProp:
 4597:  * @ctxt:  a regexp parser context
 4598:  *
 4599:  * [27]   charProp   ::=   IsCategory | IsBlock
 4600:  * [28]   IsCategory ::= Letters | Marks | Numbers | Punctuation |
 4601:  *                       Separators | Symbols | Others 
 4602:  * [29]   Letters   ::=   'L' [ultmo]?
 4603:  * [30]   Marks   ::=   'M' [nce]?
 4604:  * [31]   Numbers   ::=   'N' [dlo]?
 4605:  * [32]   Punctuation   ::=   'P' [cdseifo]?
 4606:  * [33]   Separators   ::=   'Z' [slp]?
 4607:  * [34]   Symbols   ::=   'S' [mcko]?
 4608:  * [35]   Others   ::=   'C' [cfon]?
 4609:  * [36]   IsBlock   ::=   'Is' [a-zA-Z0-9#x2D]+
 4610:  */
 4611: static void
 4612: xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
 4613:     int cur;
 4614:     xmlRegAtomType type = (xmlRegAtomType) 0;
 4615:     xmlChar *blockName = NULL;
 4616:     
 4617:     cur = CUR;
 4618:     if (cur == 'L') {
 4619: 	NEXT;
 4620: 	cur = CUR;
 4621: 	if (cur == 'u') {
 4622: 	    NEXT;
 4623: 	    type = XML_REGEXP_LETTER_UPPERCASE;
 4624: 	} else if (cur == 'l') {
 4625: 	    NEXT;
 4626: 	    type = XML_REGEXP_LETTER_LOWERCASE;
 4627: 	} else if (cur == 't') {
 4628: 	    NEXT;
 4629: 	    type = XML_REGEXP_LETTER_TITLECASE;
 4630: 	} else if (cur == 'm') {
 4631: 	    NEXT;
 4632: 	    type = XML_REGEXP_LETTER_MODIFIER;
 4633: 	} else if (cur == 'o') {
 4634: 	    NEXT;
 4635: 	    type = XML_REGEXP_LETTER_OTHERS;
 4636: 	} else {
 4637: 	    type = XML_REGEXP_LETTER;
 4638: 	}
 4639:     } else if (cur == 'M') {
 4640: 	NEXT;
 4641: 	cur = CUR;
 4642: 	if (cur == 'n') {
 4643: 	    NEXT;
 4644: 	    /* nonspacing */
 4645: 	    type = XML_REGEXP_MARK_NONSPACING;
 4646: 	} else if (cur == 'c') {
 4647: 	    NEXT;
 4648: 	    /* spacing combining */
 4649: 	    type = XML_REGEXP_MARK_SPACECOMBINING;
 4650: 	} else if (cur == 'e') {
 4651: 	    NEXT;
 4652: 	    /* enclosing */
 4653: 	    type = XML_REGEXP_MARK_ENCLOSING;
 4654: 	} else {
 4655: 	    /* all marks */
 4656: 	    type = XML_REGEXP_MARK;
 4657: 	}
 4658:     } else if (cur == 'N') {
 4659: 	NEXT;
 4660: 	cur = CUR;
 4661: 	if (cur == 'd') {
 4662: 	    NEXT;
 4663: 	    /* digital */
 4664: 	    type = XML_REGEXP_NUMBER_DECIMAL;
 4665: 	} else if (cur == 'l') {
 4666: 	    NEXT;
 4667: 	    /* letter */
 4668: 	    type = XML_REGEXP_NUMBER_LETTER;
 4669: 	} else if (cur == 'o') {
 4670: 	    NEXT;
 4671: 	    /* other */
 4672: 	    type = XML_REGEXP_NUMBER_OTHERS;
 4673: 	} else {
 4674: 	    /* all numbers */
 4675: 	    type = XML_REGEXP_NUMBER;
 4676: 	}
 4677:     } else if (cur == 'P') {
 4678: 	NEXT;
 4679: 	cur = CUR;
 4680: 	if (cur == 'c') {
 4681: 	    NEXT;
 4682: 	    /* connector */
 4683: 	    type = XML_REGEXP_PUNCT_CONNECTOR;
 4684: 	} else if (cur == 'd') {
 4685: 	    NEXT;
 4686: 	    /* dash */
 4687: 	    type = XML_REGEXP_PUNCT_DASH;
 4688: 	} else if (cur == 's') {
 4689: 	    NEXT;
 4690: 	    /* open */
 4691: 	    type = XML_REGEXP_PUNCT_OPEN;
 4692: 	} else if (cur == 'e') {
 4693: 	    NEXT;
 4694: 	    /* close */
 4695: 	    type = XML_REGEXP_PUNCT_CLOSE;
 4696: 	} else if (cur == 'i') {
 4697: 	    NEXT;
 4698: 	    /* initial quote */
 4699: 	    type = XML_REGEXP_PUNCT_INITQUOTE;
 4700: 	} else if (cur == 'f') {
 4701: 	    NEXT;
 4702: 	    /* final quote */
 4703: 	    type = XML_REGEXP_PUNCT_FINQUOTE;
 4704: 	} else if (cur == 'o') {
 4705: 	    NEXT;
 4706: 	    /* other */
 4707: 	    type = XML_REGEXP_PUNCT_OTHERS;
 4708: 	} else {
 4709: 	    /* all punctuation */
 4710: 	    type = XML_REGEXP_PUNCT;
 4711: 	}
 4712:     } else if (cur == 'Z') {
 4713: 	NEXT;
 4714: 	cur = CUR;
 4715: 	if (cur == 's') {
 4716: 	    NEXT;
 4717: 	    /* space */
 4718: 	    type = XML_REGEXP_SEPAR_SPACE;
 4719: 	} else if (cur == 'l') {
 4720: 	    NEXT;
 4721: 	    /* line */
 4722: 	    type = XML_REGEXP_SEPAR_LINE;
 4723: 	} else if (cur == 'p') {
 4724: 	    NEXT;
 4725: 	    /* paragraph */
 4726: 	    type = XML_REGEXP_SEPAR_PARA;
 4727: 	} else {
 4728: 	    /* all separators */
 4729: 	    type = XML_REGEXP_SEPAR;
 4730: 	}
 4731:     } else if (cur == 'S') {
 4732: 	NEXT;
 4733: 	cur = CUR;
 4734: 	if (cur == 'm') {
 4735: 	    NEXT;
 4736: 	    type = XML_REGEXP_SYMBOL_MATH;
 4737: 	    /* math */
 4738: 	} else if (cur == 'c') {
 4739: 	    NEXT;
 4740: 	    type = XML_REGEXP_SYMBOL_CURRENCY;
 4741: 	    /* currency */
 4742: 	} else if (cur == 'k') {
 4743: 	    NEXT;
 4744: 	    type = XML_REGEXP_SYMBOL_MODIFIER;
 4745: 	    /* modifiers */
 4746: 	} else if (cur == 'o') {
 4747: 	    NEXT;
 4748: 	    type = XML_REGEXP_SYMBOL_OTHERS;
 4749: 	    /* other */
 4750: 	} else {
 4751: 	    /* all symbols */
 4752: 	    type = XML_REGEXP_SYMBOL;
 4753: 	}
 4754:     } else if (cur == 'C') {
 4755: 	NEXT;
 4756: 	cur = CUR;
 4757: 	if (cur == 'c') {
 4758: 	    NEXT;
 4759: 	    /* control */
 4760: 	    type = XML_REGEXP_OTHER_CONTROL;
 4761: 	} else if (cur == 'f') {
 4762: 	    NEXT;
 4763: 	    /* format */
 4764: 	    type = XML_REGEXP_OTHER_FORMAT;
 4765: 	} else if (cur == 'o') {
 4766: 	    NEXT;
 4767: 	    /* private use */
 4768: 	    type = XML_REGEXP_OTHER_PRIVATE;
 4769: 	} else if (cur == 'n') {
 4770: 	    NEXT;
 4771: 	    /* not assigned */
 4772: 	    type = XML_REGEXP_OTHER_NA;
 4773: 	} else {
 4774: 	    /* all others */
 4775: 	    type = XML_REGEXP_OTHER;
 4776: 	}
 4777:     } else if (cur == 'I') {
 4778: 	const xmlChar *start;
 4779: 	NEXT;
 4780: 	cur = CUR;
 4781: 	if (cur != 's') {
 4782: 	    ERROR("IsXXXX expected");
 4783: 	    return;
 4784: 	}
 4785: 	NEXT;
 4786: 	start = ctxt->cur;
 4787: 	cur = CUR;
 4788: 	if (((cur >= 'a') && (cur <= 'z')) || 
 4789: 	    ((cur >= 'A') && (cur <= 'Z')) || 
 4790: 	    ((cur >= '0') && (cur <= '9')) || 
 4791: 	    (cur == 0x2D)) {
 4792: 	    NEXT;
 4793: 	    cur = CUR;
 4794: 	    while (((cur >= 'a') && (cur <= 'z')) || 
 4795: 		((cur >= 'A') && (cur <= 'Z')) || 
 4796: 		((cur >= '0') && (cur <= '9')) || 
 4797: 		(cur == 0x2D)) {
 4798: 		NEXT;
 4799: 		cur = CUR;
 4800: 	    }
 4801: 	}
 4802: 	type = XML_REGEXP_BLOCK_NAME;
 4803: 	blockName = xmlStrndup(start, ctxt->cur - start);
 4804:     } else {
 4805: 	ERROR("Unknown char property");
 4806: 	return;
 4807:     }
 4808:     if (ctxt->atom == NULL) {
 4809: 	ctxt->atom = xmlRegNewAtom(ctxt, type);
 4810: 	if (ctxt->atom != NULL)
 4811: 	    ctxt->atom->valuep = blockName;
 4812:     } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4813:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4814: 		           type, 0, 0, blockName);
 4815:     }
 4816: }
 4817: 
 4818: /**
 4819:  * xmlFAParseCharClassEsc:
 4820:  * @ctxt:  a regexp parser context
 4821:  *
 4822:  * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc ) 
 4823:  * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
 4824:  * [25] catEsc   ::=   '\p{' charProp '}'
 4825:  * [26] complEsc ::=   '\P{' charProp '}'
 4826:  * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
 4827:  */
 4828: static void
 4829: xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
 4830:     int cur;
 4831: 
 4832:     if (CUR == '.') {
 4833: 	if (ctxt->atom == NULL) {
 4834: 	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
 4835: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4836: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4837: 			       XML_REGEXP_ANYCHAR, 0, 0, NULL);
 4838: 	}
 4839: 	NEXT;
 4840: 	return;
 4841:     }
 4842:     if (CUR != '\\') {
 4843: 	ERROR("Escaped sequence: expecting \\");
 4844: 	return;
 4845:     }
 4846:     NEXT;
 4847:     cur = CUR;
 4848:     if (cur == 'p') {
 4849: 	NEXT;
 4850: 	if (CUR != '{') {
 4851: 	    ERROR("Expecting '{'");
 4852: 	    return;
 4853: 	}
 4854: 	NEXT;
 4855: 	xmlFAParseCharProp(ctxt);
 4856: 	if (CUR != '}') {
 4857: 	    ERROR("Expecting '}'");
 4858: 	    return;
 4859: 	}
 4860: 	NEXT;
 4861:     } else if (cur == 'P') {
 4862: 	NEXT;
 4863: 	if (CUR != '{') {
 4864: 	    ERROR("Expecting '{'");
 4865: 	    return;
 4866: 	}
 4867: 	NEXT;
 4868: 	xmlFAParseCharProp(ctxt);
 4869: 	ctxt->atom->neg = 1;
 4870: 	if (CUR != '}') {
 4871: 	    ERROR("Expecting '}'");
 4872: 	    return;
 4873: 	}
 4874: 	NEXT;
 4875:     } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
 4876: 	(cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
 4877: 	(cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
 4878: 	(cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
 4879: 	(cur == 0x5E)) {
 4880: 	if (ctxt->atom == NULL) {
 4881: 	    ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
 4882: 	    if (ctxt->atom != NULL) {
 4883: 	        switch (cur) {
 4884: 		    case 'n':
 4885: 		        ctxt->atom->codepoint = '\n';
 4886: 			break;
 4887: 		    case 'r':
 4888: 		        ctxt->atom->codepoint = '\r';
 4889: 			break;
 4890: 		    case 't':
 4891: 		        ctxt->atom->codepoint = '\t';
 4892: 			break;
 4893: 		    default:
 4894: 			ctxt->atom->codepoint = cur;
 4895: 		}
 4896: 	    }
 4897: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4898:             switch (cur) {
 4899:                 case 'n':
 4900:                     cur = '\n';
 4901:                     break;
 4902:                 case 'r':
 4903:                     cur = '\r';
 4904:                     break;
 4905:                 case 't':
 4906:                     cur = '\t';
 4907:                     break;
 4908:             }
 4909: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4910: 			       XML_REGEXP_CHARVAL, cur, cur, NULL);
 4911: 	}
 4912: 	NEXT;
 4913:     } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
 4914: 	(cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
 4915: 	(cur == 'w') || (cur == 'W')) {
 4916: 	xmlRegAtomType type = XML_REGEXP_ANYSPACE;
 4917: 
 4918: 	switch (cur) {
 4919: 	    case 's': 
 4920: 		type = XML_REGEXP_ANYSPACE;
 4921: 		break;
 4922: 	    case 'S': 
 4923: 		type = XML_REGEXP_NOTSPACE;
 4924: 		break;
 4925: 	    case 'i': 
 4926: 		type = XML_REGEXP_INITNAME;
 4927: 		break;
 4928: 	    case 'I': 
 4929: 		type = XML_REGEXP_NOTINITNAME;
 4930: 		break;
 4931: 	    case 'c': 
 4932: 		type = XML_REGEXP_NAMECHAR;
 4933: 		break;
 4934: 	    case 'C': 
 4935: 		type = XML_REGEXP_NOTNAMECHAR;
 4936: 		break;
 4937: 	    case 'd': 
 4938: 		type = XML_REGEXP_DECIMAL;
 4939: 		break;
 4940: 	    case 'D': 
 4941: 		type = XML_REGEXP_NOTDECIMAL;
 4942: 		break;
 4943: 	    case 'w': 
 4944: 		type = XML_REGEXP_REALCHAR;
 4945: 		break;
 4946: 	    case 'W': 
 4947: 		type = XML_REGEXP_NOTREALCHAR;
 4948: 		break;
 4949: 	}
 4950: 	NEXT;
 4951: 	if (ctxt->atom == NULL) {
 4952: 	    ctxt->atom = xmlRegNewAtom(ctxt, type);
 4953: 	} else if (ctxt->atom->type == XML_REGEXP_RANGES) {
 4954: 	    xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 4955: 			       type, 0, 0, NULL);
 4956: 	}
 4957:     } else {
 4958: 	ERROR("Wrong escape sequence, misuse of character '\\'");
 4959:     }
 4960: }
 4961: 
 4962: /**
 4963:  * xmlFAParseCharRange:
 4964:  * @ctxt:  a regexp parser context
 4965:  *
 4966:  * [17]   charRange   ::=     seRange | XmlCharRef | XmlCharIncDash 
 4967:  * [18]   seRange   ::=   charOrEsc '-' charOrEsc
 4968:  * [20]   charOrEsc   ::=   XmlChar | SingleCharEsc
 4969:  * [21]   XmlChar   ::=   [^\#x2D#x5B#x5D]
 4970:  * [22]   XmlCharIncDash   ::=   [^\#x5B#x5D]
 4971:  */
 4972: static void
 4973: xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
 4974:     int cur, len;
 4975:     int start = -1;
 4976:     int end = -1;
 4977: 
 4978:     if (CUR == '\0') {
 4979:         ERROR("Expecting ']'");
 4980: 	return;
 4981:     }
 4982: 
 4983:     cur = CUR;
 4984:     if (cur == '\\') {
 4985: 	NEXT;
 4986: 	cur = CUR;
 4987: 	switch (cur) {
 4988: 	    case 'n': start = 0xA; break;
 4989: 	    case 'r': start = 0xD; break;
 4990: 	    case 't': start = 0x9; break;
 4991: 	    case '\\': case '|': case '.': case '-': case '^': case '?':
 4992: 	    case '*': case '+': case '{': case '}': case '(': case ')':
 4993: 	    case '[': case ']':
 4994: 		start = cur; break;
 4995: 	    default:
 4996: 		ERROR("Invalid escape value");
 4997: 		return;
 4998: 	}
 4999: 	end = start;
 5000:         len = 1;
 5001:     } else if ((cur != 0x5B) && (cur != 0x5D)) {
 5002:         end = start = CUR_SCHAR(ctxt->cur, len);
 5003:     } else {
 5004: 	ERROR("Expecting a char range");
 5005: 	return;
 5006:     }
 5007:     /*
 5008:      * Since we are "inside" a range, we can assume ctxt->cur is past
 5009:      * the start of ctxt->string, and PREV should be safe
 5010:      */
 5011:     if ((start == '-') && (NXT(1) != ']') && (PREV != '[') && (PREV != '^')) {
 5012: 	NEXTL(len);
 5013: 	return;
 5014:     }
 5015:     NEXTL(len);
 5016:     cur = CUR;
 5017:     if ((cur != '-') || (NXT(1) == ']')) {
 5018:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 5019: 		              XML_REGEXP_CHARVAL, start, end, NULL);
 5020: 	return;
 5021:     }
 5022:     NEXT;
 5023:     cur = CUR;
 5024:     if (cur == '\\') {
 5025: 	NEXT;
 5026: 	cur = CUR;
 5027: 	switch (cur) {
 5028: 	    case 'n': end = 0xA; break;
 5029: 	    case 'r': end = 0xD; break;
 5030: 	    case 't': end = 0x9; break;
 5031: 	    case '\\': case '|': case '.': case '-': case '^': case '?':
 5032: 	    case '*': case '+': case '{': case '}': case '(': case ')':
 5033: 	    case '[': case ']':
 5034: 		end = cur; break;
 5035: 	    default:
 5036: 		ERROR("Invalid escape value");
 5037: 		return;
 5038: 	}
 5039:         len = 1;
 5040:     } else if ((cur != 0x5B) && (cur != 0x5D)) {
 5041:         end = CUR_SCHAR(ctxt->cur, len);
 5042:     } else {
 5043: 	ERROR("Expecting the end of a char range");
 5044: 	return;
 5045:     }
 5046:     NEXTL(len);
 5047:     /* TODO check that the values are acceptable character ranges for XML */
 5048:     if (end < start) {
 5049: 	ERROR("End of range is before start of range");
 5050:     } else {
 5051:         xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
 5052: 		           XML_REGEXP_CHARVAL, start, end, NULL);
 5053:     }
 5054:     return;
 5055: }
 5056: 
 5057: /**
 5058:  * xmlFAParsePosCharGroup:
 5059:  * @ctxt:  a regexp parser context
 5060:  *
 5061:  * [14]   posCharGroup ::= ( charRange | charClassEsc  )+
 5062:  */
 5063: static void
 5064: xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
 5065:     do {
 5066: 	if (CUR == '\\') {
 5067: 	    xmlFAParseCharClassEsc(ctxt);
 5068: 	} else {
 5069: 	    xmlFAParseCharRange(ctxt);
 5070: 	}
 5071:     } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
 5072:              (CUR != 0) && (ctxt->error == 0));
 5073: }
 5074: 
 5075: /**
 5076:  * xmlFAParseCharGroup:
 5077:  * @ctxt:  a regexp parser context
 5078:  *
 5079:  * [13]   charGroup    ::= posCharGroup | negCharGroup | charClassSub
 5080:  * [15]   negCharGroup ::= '^' posCharGroup
 5081:  * [16]   charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr  
 5082:  * [12]   charClassExpr ::= '[' charGroup ']'
 5083:  */
 5084: static void
 5085: xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
 5086:     int n = ctxt->neg;
 5087:     while ((CUR != ']') && (ctxt->error == 0)) {
 5088: 	if (CUR == '^') {
 5089: 	    int neg = ctxt->neg;
 5090: 
 5091: 	    NEXT;
 5092: 	    ctxt->neg = !ctxt->neg;
 5093: 	    xmlFAParsePosCharGroup(ctxt);
 5094: 	    ctxt->neg = neg;
 5095: 	} else if ((CUR == '-') && (NXT(1) == '[')) {
 5096: 	    int neg = ctxt->neg;
 5097: 	    ctxt->neg = 2;
 5098: 	    NEXT;	/* eat the '-' */
 5099: 	    NEXT;	/* eat the '[' */
 5100: 	    xmlFAParseCharGroup(ctxt);
 5101: 	    if (CUR == ']') {
 5102: 		NEXT;
 5103: 	    } else {
 5104: 		ERROR("charClassExpr: ']' expected");
 5105: 		break;
 5106: 	    }
 5107: 	    ctxt->neg = neg;
 5108: 	    break;
 5109: 	} else if (CUR != ']') {
 5110: 	    xmlFAParsePosCharGroup(ctxt);
 5111: 	}
 5112:     }
 5113:     ctxt->neg = n;
 5114: }
 5115: 
 5116: /**
 5117:  * xmlFAParseCharClass:
 5118:  * @ctxt:  a regexp parser context
 5119:  *
 5120:  * [11]   charClass   ::=     charClassEsc | charClassExpr
 5121:  * [12]   charClassExpr   ::=   '[' charGroup ']'
 5122:  */
 5123: static void
 5124: xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
 5125:     if (CUR == '[') {
 5126: 	NEXT;
 5127: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
 5128: 	if (ctxt->atom == NULL)
 5129: 	    return;
 5130: 	xmlFAParseCharGroup(ctxt);
 5131: 	if (CUR == ']') {
 5132: 	    NEXT;
 5133: 	} else {
 5134: 	    ERROR("xmlFAParseCharClass: ']' expected");
 5135: 	}
 5136:     } else {
 5137: 	xmlFAParseCharClassEsc(ctxt);
 5138:     }
 5139: }
 5140: 
 5141: /**
 5142:  * xmlFAParseQuantExact:
 5143:  * @ctxt:  a regexp parser context
 5144:  *
 5145:  * [8]   QuantExact   ::=   [0-9]+
 5146:  *
 5147:  * Returns 0 if success or -1 in case of error
 5148:  */
 5149: static int
 5150: xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
 5151:     int ret = 0;
 5152:     int ok = 0;
 5153: 
 5154:     while ((CUR >= '0') && (CUR <= '9')) {
 5155: 	ret = ret * 10 + (CUR - '0');
 5156: 	ok = 1;
 5157: 	NEXT;
 5158:     }
 5159:     if (ok != 1) {
 5160: 	return(-1);
 5161:     }
 5162:     return(ret);
 5163: }
 5164: 
 5165: /**
 5166:  * xmlFAParseQuantifier:
 5167:  * @ctxt:  a regexp parser context
 5168:  *
 5169:  * [4]   quantifier   ::=   [?*+] | ( '{' quantity '}' )
 5170:  * [5]   quantity   ::=   quantRange | quantMin | QuantExact
 5171:  * [6]   quantRange   ::=   QuantExact ',' QuantExact
 5172:  * [7]   quantMin   ::=   QuantExact ','
 5173:  * [8]   QuantExact   ::=   [0-9]+
 5174:  */
 5175: static int
 5176: xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
 5177:     int cur;
 5178: 
 5179:     cur = CUR;
 5180:     if ((cur == '?') || (cur == '*') || (cur == '+')) {
 5181: 	if (ctxt->atom != NULL) {
 5182: 	    if (cur == '?')
 5183: 		ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
 5184: 	    else if (cur == '*')
 5185: 		ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
 5186: 	    else if (cur == '+')
 5187: 		ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
 5188: 	}
 5189: 	NEXT;
 5190: 	return(1);
 5191:     }
 5192:     if (cur == '{') {
 5193: 	int min = 0, max = 0;
 5194: 
 5195: 	NEXT;
 5196: 	cur = xmlFAParseQuantExact(ctxt);
 5197: 	if (cur >= 0)
 5198: 	    min = cur;
 5199: 	if (CUR == ',') {
 5200: 	    NEXT;
 5201: 	    if (CUR == '}')
 5202: 	        max = INT_MAX;
 5203: 	    else {
 5204: 	        cur = xmlFAParseQuantExact(ctxt);
 5205: 	        if (cur >= 0)
 5206: 		    max = cur;
 5207: 		else {
 5208: 		    ERROR("Improper quantifier");
 5209: 		}
 5210: 	    }
 5211: 	}
 5212: 	if (CUR == '}') {
 5213: 	    NEXT;
 5214: 	} else {
 5215: 	    ERROR("Unterminated quantifier");
 5216: 	}
 5217: 	if (max == 0)
 5218: 	    max = min;
 5219: 	if (ctxt->atom != NULL) {
 5220: 	    ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
 5221: 	    ctxt->atom->min = min;
 5222: 	    ctxt->atom->max = max;
 5223: 	}
 5224: 	return(1);
 5225:     }
 5226:     return(0);
 5227: }
 5228: 
 5229: /**
 5230:  * xmlFAParseAtom:
 5231:  * @ctxt:  a regexp parser context
 5232:  *
 5233:  * [9]   atom   ::=   Char | charClass | ( '(' regExp ')' )
 5234:  */
 5235: static int
 5236: xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
 5237:     int codepoint, len;
 5238: 
 5239:     codepoint = xmlFAIsChar(ctxt);
 5240:     if (codepoint > 0) {
 5241: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
 5242: 	if (ctxt->atom == NULL)
 5243: 	    return(-1);
 5244: 	codepoint = CUR_SCHAR(ctxt->cur, len);
 5245: 	ctxt->atom->codepoint = codepoint;
 5246: 	NEXTL(len);
 5247: 	return(1);
 5248:     } else if (CUR == '|') {
 5249: 	return(0);
 5250:     } else if (CUR == 0) {
 5251: 	return(0);
 5252:     } else if (CUR == ')') {
 5253: 	return(0);
 5254:     } else if (CUR == '(') {
 5255: 	xmlRegStatePtr start, oldend, start0;
 5256: 
 5257: 	NEXT;
 5258: 	/*
 5259: 	 * this extra Epsilon transition is needed if we count with 0 allowed
 5260: 	 * unfortunately this can't be known at that point
 5261: 	 */
 5262: 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 5263: 	start0 = ctxt->state;
 5264: 	xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
 5265: 	start = ctxt->state;
 5266: 	oldend = ctxt->end;
 5267: 	ctxt->end = NULL;
 5268: 	ctxt->atom = NULL;
 5269: 	xmlFAParseRegExp(ctxt, 0);
 5270: 	if (CUR == ')') {
 5271: 	    NEXT;
 5272: 	} else {
 5273: 	    ERROR("xmlFAParseAtom: expecting ')'");
 5274: 	}
 5275: 	ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
 5276: 	if (ctxt->atom == NULL)
 5277: 	    return(-1);
 5278: 	ctxt->atom->start = start;
 5279: 	ctxt->atom->start0 = start0;
 5280: 	ctxt->atom->stop = ctxt->state;
 5281: 	ctxt->end = oldend;
 5282: 	return(1);
 5283:     } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
 5284: 	xmlFAParseCharClass(ctxt);
 5285: 	return(1);
 5286:     }
 5287:     return(0);
 5288: }
 5289: 
 5290: /**
 5291:  * xmlFAParsePiece:
 5292:  * @ctxt:  a regexp parser context
 5293:  *
 5294:  * [3]   piece   ::=   atom quantifier?
 5295:  */
 5296: static int
 5297: xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
 5298:     int ret;
 5299: 
 5300:     ctxt->atom = NULL;
 5301:     ret = xmlFAParseAtom(ctxt);
 5302:     if (ret == 0)
 5303: 	return(0);
 5304:     if (ctxt->atom == NULL) {
 5305: 	ERROR("internal: no atom generated");
 5306:     }
 5307:     xmlFAParseQuantifier(ctxt);
 5308:     return(1);
 5309: }
 5310: 
 5311: /**
 5312:  * xmlFAParseBranch:
 5313:  * @ctxt:  a regexp parser context
 5314:  * @to: optional target to the end of the branch
 5315:  *
 5316:  * @to is used to optimize by removing duplicate path in automata
 5317:  * in expressions like (a|b)(c|d)
 5318:  *
 5319:  * [2]   branch   ::=   piece*
 5320:  */
 5321: static int
 5322: xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr to) {
 5323:     xmlRegStatePtr previous;
 5324:     int ret;
 5325: 
 5326:     previous = ctxt->state;
 5327:     ret = xmlFAParsePiece(ctxt);
 5328:     if (ret != 0) {
 5329: 	if (xmlFAGenerateTransitions(ctxt, previous, 
 5330: 	        (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 5331: 	    return(-1);
 5332: 	previous = ctxt->state;
 5333: 	ctxt->atom = NULL;
 5334:     }
 5335:     while ((ret != 0) && (ctxt->error == 0)) {
 5336: 	ret = xmlFAParsePiece(ctxt);
 5337: 	if (ret != 0) {
 5338: 	    if (xmlFAGenerateTransitions(ctxt, previous, 
 5339: 	            (CUR=='|' || CUR==')') ? to : NULL, ctxt->atom) < 0)
 5340: 		    return(-1);
 5341: 	    previous = ctxt->state;
 5342: 	    ctxt->atom = NULL;
 5343: 	}
 5344:     }
 5345:     return(0);
 5346: }
 5347: 
 5348: /**
 5349:  * xmlFAParseRegExp:
 5350:  * @ctxt:  a regexp parser context
 5351:  * @top:  is this the top-level expression ?
 5352:  *
 5353:  * [1]   regExp   ::=     branch  ( '|' branch )*
 5354:  */
 5355: static void
 5356: xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
 5357:     xmlRegStatePtr start, end;
 5358: 
 5359:     /* if not top start should have been generated by an epsilon trans */
 5360:     start = ctxt->state;
 5361:     ctxt->end = NULL;
 5362:     xmlFAParseBranch(ctxt, NULL);
 5363:     if (top) {
 5364: #ifdef DEBUG_REGEXP_GRAPH
 5365: 	printf("State %d is final\n", ctxt->state->no);
 5366: #endif
 5367: 	ctxt->state->type = XML_REGEXP_FINAL_STATE;
 5368:     }
 5369:     if (CUR != '|') {
 5370: 	ctxt->end = ctxt->state;
 5371: 	return;
 5372:     }
 5373:     end = ctxt->state;
 5374:     while ((CUR == '|') && (ctxt->error == 0)) {
 5375: 	NEXT;
 5376: 	ctxt->state = start;
 5377: 	ctxt->end = NULL;
 5378: 	xmlFAParseBranch(ctxt, end);
 5379:     }
 5380:     if (!top) {
 5381: 	ctxt->state = end;
 5382: 	ctxt->end = end;
 5383:     }
 5384: }
 5385: 
 5386: /************************************************************************
 5387:  * 									*
 5388:  * 			The basic API					*
 5389:  * 									*
 5390:  ************************************************************************/
 5391: 
 5392: /**
 5393:  * xmlRegexpPrint:
 5394:  * @output: the file for the output debug
 5395:  * @regexp: the compiled regexp
 5396:  *
 5397:  * Print the content of the compiled regular expression
 5398:  */
 5399: void
 5400: xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
 5401:     int i;
 5402: 
 5403:     if (output == NULL)
 5404:         return;
 5405:     fprintf(output, " regexp: ");
 5406:     if (regexp == NULL) {
 5407: 	fprintf(output, "NULL\n");
 5408: 	return;
 5409:     }
 5410:     fprintf(output, "'%s' ", regexp->string);
 5411:     fprintf(output, "\n");
 5412:     fprintf(output, "%d atoms:\n", regexp->nbAtoms);
 5413:     for (i = 0;i < regexp->nbAtoms; i++) {
 5414: 	fprintf(output, " %02d ", i);
 5415: 	xmlRegPrintAtom(output, regexp->atoms[i]);
 5416:     }
 5417:     fprintf(output, "%d states:", regexp->nbStates);
 5418:     fprintf(output, "\n");
 5419:     for (i = 0;i < regexp->nbStates; i++) {
 5420: 	xmlRegPrintState(output, regexp->states[i]);
 5421:     }
 5422:     fprintf(output, "%d counters:\n", regexp->nbCounters);
 5423:     for (i = 0;i < regexp->nbCounters; i++) {
 5424: 	fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
 5425: 		                                regexp->counters[i].max);
 5426:     }
 5427: }
 5428: 
 5429: /**
 5430:  * xmlRegexpCompile:
 5431:  * @regexp:  a regular expression string
 5432:  *
 5433:  * Parses a regular expression conforming to XML Schemas Part 2 Datatype
 5434:  * Appendix F and builds an automata suitable for testing strings against
 5435:  * that regular expression
 5436:  *
 5437:  * Returns the compiled expression or NULL in case of error
 5438:  */
 5439: xmlRegexpPtr
 5440: xmlRegexpCompile(const xmlChar *regexp) {
 5441:     xmlRegexpPtr ret;
 5442:     xmlRegParserCtxtPtr ctxt;
 5443: 
 5444:     ctxt = xmlRegNewParserCtxt(regexp);
 5445:     if (ctxt == NULL)
 5446: 	return(NULL);
 5447: 
 5448:     /* initialize the parser */
 5449:     ctxt->end = NULL;
 5450:     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
 5451:     xmlRegStatePush(ctxt, ctxt->start);
 5452: 
 5453:     /* parse the expression building an automata */
 5454:     xmlFAParseRegExp(ctxt, 1);
 5455:     if (CUR != 0) {
 5456: 	ERROR("xmlFAParseRegExp: extra characters");
 5457:     }
 5458:     if (ctxt->error != 0) {
 5459: 	xmlRegFreeParserCtxt(ctxt);
 5460: 	return(NULL);
 5461:     }
 5462:     ctxt->end = ctxt->state;
 5463:     ctxt->start->type = XML_REGEXP_START_STATE;
 5464:     ctxt->end->type = XML_REGEXP_FINAL_STATE;
 5465: 
 5466:     /* remove the Epsilon except for counted transitions */
 5467:     xmlFAEliminateEpsilonTransitions(ctxt);
 5468: 
 5469: 
 5470:     if (ctxt->error != 0) {
 5471: 	xmlRegFreeParserCtxt(ctxt);
 5472: 	return(NULL);
 5473:     }
 5474:     ret = xmlRegEpxFromParse(ctxt);
 5475:     xmlRegFreeParserCtxt(ctxt);
 5476:     return(ret);
 5477: }
 5478: 
 5479: /**
 5480:  * xmlRegexpExec:
 5481:  * @comp:  the compiled regular expression
 5482:  * @content:  the value to check against the regular expression
 5483:  *
 5484:  * Check if the regular expression generates the value
 5485:  *
 5486:  * Returns 1 if it matches, 0 if not and a negative value in case of error
 5487:  */
 5488: int
 5489: xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
 5490:     if ((comp == NULL) || (content == NULL))
 5491: 	return(-1);
 5492:     return(xmlFARegExec(comp, content));
 5493: }
 5494: 
 5495: /**
 5496:  * xmlRegexpIsDeterminist:
 5497:  * @comp:  the compiled regular expression
 5498:  *
 5499:  * Check if the regular expression is determinist
 5500:  *
 5501:  * Returns 1 if it yes, 0 if not and a negative value in case of error
 5502:  */
 5503: int
 5504: xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
 5505:     xmlAutomataPtr am;
 5506:     int ret;
 5507: 
 5508:     if (comp == NULL)
 5509: 	return(-1);
 5510:     if (comp->determinist != -1)
 5511: 	return(comp->determinist);
 5512: 
 5513:     am = xmlNewAutomata();
 5514:     if (am->states != NULL) {
 5515: 	int i;
 5516: 
 5517: 	for (i = 0;i < am->nbStates;i++)
 5518: 	    xmlRegFreeState(am->states[i]);
 5519: 	xmlFree(am->states);
 5520:     }
 5521:     am->nbAtoms = comp->nbAtoms;
 5522:     am->atoms = comp->atoms;
 5523:     am->nbStates = comp->nbStates;
 5524:     am->states = comp->states;
 5525:     am->determinist = -1;
 5526:     am->flags = comp->flags;
 5527:     ret = xmlFAComputesDeterminism(am);
 5528:     am->atoms = NULL;
 5529:     am->states = NULL;
 5530:     xmlFreeAutomata(am);
 5531:     comp->determinist = ret;
 5532:     return(ret);
 5533: }
 5534: 
 5535: /**
 5536:  * xmlRegFreeRegexp:
 5537:  * @regexp:  the regexp
 5538:  *
 5539:  * Free a regexp
 5540:  */
 5541: void
 5542: xmlRegFreeRegexp(xmlRegexpPtr regexp) {
 5543:     int i;
 5544:     if (regexp == NULL)
 5545: 	return;
 5546: 
 5547:     if (regexp->string != NULL)
 5548: 	xmlFree(regexp->string);
 5549:     if (regexp->states != NULL) {
 5550: 	for (i = 0;i < regexp->nbStates;i++)
 5551: 	    xmlRegFreeState(regexp->states[i]);
 5552: 	xmlFree(regexp->states);
 5553:     }
 5554:     if (regexp->atoms != NULL) {
 5555: 	for (i = 0;i < regexp->nbAtoms;i++)
 5556: 	    xmlRegFreeAtom(regexp->atoms[i]);
 5557: 	xmlFree(regexp->atoms);
 5558:     }
 5559:     if (regexp->counters != NULL)
 5560: 	xmlFree(regexp->counters);
 5561:     if (regexp->compact != NULL)
 5562: 	xmlFree(regexp->compact);
 5563:     if (regexp->transdata != NULL)
 5564: 	xmlFree(regexp->transdata);
 5565:     if (regexp->stringMap != NULL) {
 5566: 	for (i = 0; i < regexp->nbstrings;i++)
 5567: 	    xmlFree(regexp->stringMap[i]);
 5568: 	xmlFree(regexp->stringMap);
 5569:     }
 5570: 
 5571:     xmlFree(regexp);
 5572: }
 5573: 
 5574: #ifdef LIBXML_AUTOMATA_ENABLED
 5575: /************************************************************************
 5576:  * 									*
 5577:  * 			The Automata interface				*
 5578:  * 									*
 5579:  ************************************************************************/
 5580: 
 5581: /**
 5582:  * xmlNewAutomata:
 5583:  *
 5584:  * Create a new automata
 5585:  *
 5586:  * Returns the new object or NULL in case of failure
 5587:  */
 5588: xmlAutomataPtr
 5589: xmlNewAutomata(void) {
 5590:     xmlAutomataPtr ctxt;
 5591: 
 5592:     ctxt = xmlRegNewParserCtxt(NULL);
 5593:     if (ctxt == NULL)
 5594: 	return(NULL);
 5595: 
 5596:     /* initialize the parser */
 5597:     ctxt->end = NULL;
 5598:     ctxt->start = ctxt->state = xmlRegNewState(ctxt);
 5599:     if (ctxt->start == NULL) {
 5600: 	xmlFreeAutomata(ctxt);
 5601: 	return(NULL);
 5602:     }
 5603:     ctxt->start->type = XML_REGEXP_START_STATE;
 5604:     if (xmlRegStatePush(ctxt, ctxt->start) < 0) {
 5605:         xmlRegFreeState(ctxt->start);
 5606: 	xmlFreeAutomata(ctxt);
 5607: 	return(NULL);
 5608:     }
 5609:     ctxt->flags = 0;
 5610: 
 5611:     return(ctxt);
 5612: }
 5613: 
 5614: /**
 5615:  * xmlFreeAutomata:
 5616:  * @am: an automata
 5617:  *
 5618:  * Free an automata
 5619:  */
 5620: void
 5621: xmlFreeAutomata(xmlAutomataPtr am) {
 5622:     if (am == NULL)
 5623: 	return;
 5624:     xmlRegFreeParserCtxt(am);
 5625: }
 5626: 
 5627: /**
 5628:  * xmlAutomataSetFlags:
 5629:  * @am: an automata
 5630:  * @flags:  a set of internal flags
 5631:  *
 5632:  * Set some flags on the automata
 5633:  */
 5634: void
 5635: xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
 5636:     if (am == NULL)
 5637: 	return;
 5638:     am->flags |= flags;
 5639: }
 5640: 
 5641: /**
 5642:  * xmlAutomataGetInitState:
 5643:  * @am: an automata
 5644:  *
 5645:  * Initial state lookup
 5646:  *
 5647:  * Returns the initial state of the automata
 5648:  */
 5649: xmlAutomataStatePtr
 5650: xmlAutomataGetInitState(xmlAutomataPtr am) {
 5651:     if (am == NULL)
 5652: 	return(NULL);
 5653:     return(am->start);
 5654: }
 5655: 
 5656: /**
 5657:  * xmlAutomataSetFinalState:
 5658:  * @am: an automata
 5659:  * @state: a state in this automata
 5660:  *
 5661:  * Makes that state a final state
 5662:  *
 5663:  * Returns 0 or -1 in case of error
 5664:  */
 5665: int
 5666: xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
 5667:     if ((am == NULL) || (state == NULL))
 5668: 	return(-1);
 5669:     state->type = XML_REGEXP_FINAL_STATE;
 5670:     return(0);
 5671: }
 5672: 
 5673: /**
 5674:  * xmlAutomataNewTransition:
 5675:  * @am: an automata
 5676:  * @from: the starting point of the transition
 5677:  * @to: the target point of the transition or NULL
 5678:  * @token: the input string associated to that transition
 5679:  * @data: data passed to the callback function if the transition is activated
 5680:  *
 5681:  * If @to is NULL, this creates first a new target state in the automata
 5682:  * and then adds a transition from the @from state to the target state
 5683:  * activated by the value of @token
 5684:  *
 5685:  * Returns the target state or NULL in case of error
 5686:  */
 5687: xmlAutomataStatePtr
 5688: xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5689: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5690: 			 void *data) {
 5691:     xmlRegAtomPtr atom;
 5692: 
 5693:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5694: 	return(NULL);
 5695:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5696:     if (atom == NULL)
 5697:         return(NULL);
 5698:     atom->data = data;
 5699:     if (atom == NULL)
 5700: 	return(NULL);
 5701:     atom->valuep = xmlStrdup(token);
 5702: 
 5703:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5704:         xmlRegFreeAtom(atom);
 5705: 	return(NULL);
 5706:     }
 5707:     if (to == NULL)
 5708: 	return(am->state);
 5709:     return(to);
 5710: }
 5711: 
 5712: /**
 5713:  * xmlAutomataNewTransition2:
 5714:  * @am: an automata
 5715:  * @from: the starting point of the transition
 5716:  * @to: the target point of the transition or NULL
 5717:  * @token: the first input string associated to that transition
 5718:  * @token2: the second input string associated to that transition
 5719:  * @data: data passed to the callback function if the transition is activated
 5720:  *
 5721:  * If @to is NULL, this creates first a new target state in the automata
 5722:  * and then adds a transition from the @from state to the target state
 5723:  * activated by the value of @token
 5724:  *
 5725:  * Returns the target state or NULL in case of error
 5726:  */
 5727: xmlAutomataStatePtr
 5728: xmlAutomataNewTransition2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5729: 			  xmlAutomataStatePtr to, const xmlChar *token,
 5730: 			  const xmlChar *token2, void *data) {
 5731:     xmlRegAtomPtr atom;
 5732: 
 5733:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5734: 	return(NULL);
 5735:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5736:     if (atom == NULL)
 5737: 	return(NULL);
 5738:     atom->data = data;
 5739:     if ((token2 == NULL) || (*token2 == 0)) {
 5740: 	atom->valuep = xmlStrdup(token);
 5741:     } else {
 5742: 	int lenn, lenp;
 5743: 	xmlChar *str;
 5744: 
 5745: 	lenn = strlen((char *) token2);
 5746: 	lenp = strlen((char *) token);
 5747: 
 5748: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5749: 	if (str == NULL) {
 5750: 	    xmlRegFreeAtom(atom);
 5751: 	    return(NULL);
 5752: 	}
 5753: 	memcpy(&str[0], token, lenp);
 5754: 	str[lenp] = '|';
 5755: 	memcpy(&str[lenp + 1], token2, lenn);
 5756: 	str[lenn + lenp + 1] = 0;
 5757: 
 5758: 	atom->valuep = str;
 5759:     }
 5760: 
 5761:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5762:         xmlRegFreeAtom(atom);
 5763: 	return(NULL);
 5764:     }
 5765:     if (to == NULL)
 5766: 	return(am->state);
 5767:     return(to);
 5768: }
 5769: 
 5770: /**
 5771:  * xmlAutomataNewNegTrans:
 5772:  * @am: an automata
 5773:  * @from: the starting point of the transition
 5774:  * @to: the target point of the transition or NULL
 5775:  * @token: the first input string associated to that transition
 5776:  * @token2: the second input string associated to that transition
 5777:  * @data: data passed to the callback function if the transition is activated
 5778:  *
 5779:  * If @to is NULL, this creates first a new target state in the automata
 5780:  * and then adds a transition from the @from state to the target state
 5781:  * activated by any value except (@token,@token2)
 5782:  * Note that if @token2 is not NULL, then (X, NULL) won't match to follow
 5783:  # the semantic of XSD ##other
 5784:  *
 5785:  * Returns the target state or NULL in case of error
 5786:  */
 5787: xmlAutomataStatePtr
 5788: xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5789: 		       xmlAutomataStatePtr to, const xmlChar *token,
 5790: 		       const xmlChar *token2, void *data) {
 5791:     xmlRegAtomPtr atom;
 5792:     xmlChar err_msg[200];
 5793: 
 5794:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5795: 	return(NULL);
 5796:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5797:     if (atom == NULL)
 5798: 	return(NULL);
 5799:     atom->data = data;
 5800:     atom->neg = 1;
 5801:     if ((token2 == NULL) || (*token2 == 0)) {
 5802: 	atom->valuep = xmlStrdup(token);
 5803:     } else {
 5804: 	int lenn, lenp;
 5805: 	xmlChar *str;
 5806: 
 5807: 	lenn = strlen((char *) token2);
 5808: 	lenp = strlen((char *) token);
 5809: 
 5810: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5811: 	if (str == NULL) {
 5812: 	    xmlRegFreeAtom(atom);
 5813: 	    return(NULL);
 5814: 	}
 5815: 	memcpy(&str[0], token, lenp);
 5816: 	str[lenp] = '|';
 5817: 	memcpy(&str[lenp + 1], token2, lenn);
 5818: 	str[lenn + lenp + 1] = 0;
 5819: 
 5820: 	atom->valuep = str;
 5821:     }
 5822:     snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
 5823:     err_msg[199] = 0;
 5824:     atom->valuep2 = xmlStrdup(err_msg);
 5825: 
 5826:     if (xmlFAGenerateTransitions(am, from, to, atom) < 0) {
 5827:         xmlRegFreeAtom(atom);
 5828: 	return(NULL);
 5829:     }
 5830:     am->negs++;
 5831:     if (to == NULL)
 5832: 	return(am->state);
 5833:     return(to);
 5834: }
 5835: 
 5836: /**
 5837:  * xmlAutomataNewCountTrans2:
 5838:  * @am: an automata
 5839:  * @from: the starting point of the transition
 5840:  * @to: the target point of the transition or NULL
 5841:  * @token: the input string associated to that transition
 5842:  * @token2: the second input string associated to that transition
 5843:  * @min:  the minimum successive occurences of token
 5844:  * @max:  the maximum successive occurences of token
 5845:  * @data:  data associated to the transition
 5846:  *
 5847:  * If @to is NULL, this creates first a new target state in the automata
 5848:  * and then adds a transition from the @from state to the target state
 5849:  * activated by a succession of input of value @token and @token2 and 
 5850:  * whose number is between @min and @max
 5851:  *
 5852:  * Returns the target state or NULL in case of error
 5853:  */
 5854: xmlAutomataStatePtr
 5855: xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5856: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5857: 			 const xmlChar *token2,
 5858: 			 int min, int max, void *data) {
 5859:     xmlRegAtomPtr atom;
 5860:     int counter;
 5861: 
 5862:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5863: 	return(NULL);
 5864:     if (min < 0)
 5865: 	return(NULL);
 5866:     if ((max < min) || (max < 1))
 5867: 	return(NULL);
 5868:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5869:     if (atom == NULL)
 5870: 	return(NULL);
 5871:     if ((token2 == NULL) || (*token2 == 0)) {
 5872: 	atom->valuep = xmlStrdup(token);
 5873:     } else {
 5874: 	int lenn, lenp;
 5875: 	xmlChar *str;
 5876: 
 5877: 	lenn = strlen((char *) token2);
 5878: 	lenp = strlen((char *) token);
 5879: 
 5880: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 5881: 	if (str == NULL) {
 5882: 	    xmlRegFreeAtom(atom);
 5883: 	    return(NULL);
 5884: 	}
 5885: 	memcpy(&str[0], token, lenp);
 5886: 	str[lenp] = '|';
 5887: 	memcpy(&str[lenp + 1], token2, lenn);
 5888: 	str[lenn + lenp + 1] = 0;
 5889: 
 5890: 	atom->valuep = str;
 5891:     }
 5892:     atom->data = data;
 5893:     if (min == 0)
 5894: 	atom->min = 1;
 5895:     else
 5896: 	atom->min = min;
 5897:     atom->max = max;
 5898: 
 5899:     /*
 5900:      * associate a counter to the transition.
 5901:      */
 5902:     counter = xmlRegGetCounter(am);
 5903:     am->counters[counter].min = min;
 5904:     am->counters[counter].max = max;
 5905: 
 5906:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 5907:     if (to == NULL) {
 5908:         to = xmlRegNewState(am);
 5909: 	xmlRegStatePush(am, to);
 5910:     }
 5911:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 5912:     xmlRegAtomPush(am, atom);
 5913:     am->state = to;
 5914: 
 5915:     if (to == NULL)
 5916: 	to = am->state;
 5917:     if (to == NULL)
 5918: 	return(NULL);
 5919:     if (min == 0)
 5920: 	xmlFAGenerateEpsilonTransition(am, from, to);
 5921:     return(to);
 5922: }
 5923: 
 5924: /**
 5925:  * xmlAutomataNewCountTrans:
 5926:  * @am: an automata
 5927:  * @from: the starting point of the transition
 5928:  * @to: the target point of the transition or NULL
 5929:  * @token: the input string associated to that transition
 5930:  * @min:  the minimum successive occurences of token
 5931:  * @max:  the maximum successive occurences of token
 5932:  * @data:  data associated to the transition
 5933:  *
 5934:  * If @to is NULL, this creates first a new target state in the automata
 5935:  * and then adds a transition from the @from state to the target state
 5936:  * activated by a succession of input of value @token and whose number
 5937:  * is between @min and @max
 5938:  *
 5939:  * Returns the target state or NULL in case of error
 5940:  */
 5941: xmlAutomataStatePtr
 5942: xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 5943: 			 xmlAutomataStatePtr to, const xmlChar *token,
 5944: 			 int min, int max, void *data) {
 5945:     xmlRegAtomPtr atom;
 5946:     int counter;
 5947: 
 5948:     if ((am == NULL) || (from == NULL) || (token == NULL))
 5949: 	return(NULL);
 5950:     if (min < 0)
 5951: 	return(NULL);
 5952:     if ((max < min) || (max < 1))
 5953: 	return(NULL);
 5954:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 5955:     if (atom == NULL)
 5956: 	return(NULL);
 5957:     atom->valuep = xmlStrdup(token);
 5958:     atom->data = data;
 5959:     if (min == 0)
 5960: 	atom->min = 1;
 5961:     else
 5962: 	atom->min = min;
 5963:     atom->max = max;
 5964: 
 5965:     /*
 5966:      * associate a counter to the transition.
 5967:      */
 5968:     counter = xmlRegGetCounter(am);
 5969:     am->counters[counter].min = min;
 5970:     am->counters[counter].max = max;
 5971: 
 5972:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 5973:     if (to == NULL) {
 5974:         to = xmlRegNewState(am);
 5975: 	xmlRegStatePush(am, to);
 5976:     }
 5977:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 5978:     xmlRegAtomPush(am, atom);
 5979:     am->state = to;
 5980: 
 5981:     if (to == NULL)
 5982: 	to = am->state;
 5983:     if (to == NULL)
 5984: 	return(NULL);
 5985:     if (min == 0)
 5986: 	xmlFAGenerateEpsilonTransition(am, from, to);
 5987:     return(to);
 5988: }
 5989: 
 5990: /**
 5991:  * xmlAutomataNewOnceTrans2:
 5992:  * @am: an automata
 5993:  * @from: the starting point of the transition
 5994:  * @to: the target point of the transition or NULL
 5995:  * @token: the input string associated to that transition
 5996:  * @token2: the second input string associated to that transition
 5997:  * @min:  the minimum successive occurences of token
 5998:  * @max:  the maximum successive occurences of token
 5999:  * @data:  data associated to the transition
 6000:  *
 6001:  * If @to is NULL, this creates first a new target state in the automata
 6002:  * and then adds a transition from the @from state to the target state
 6003:  * activated by a succession of input of value @token and @token2 and whose 
 6004:  * number is between @min and @max, moreover that transition can only be 
 6005:  * crossed once.
 6006:  *
 6007:  * Returns the target state or NULL in case of error
 6008:  */
 6009: xmlAutomataStatePtr
 6010: xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6011: 			 xmlAutomataStatePtr to, const xmlChar *token,
 6012: 			 const xmlChar *token2,
 6013: 			 int min, int max, void *data) {
 6014:     xmlRegAtomPtr atom;
 6015:     int counter;
 6016: 
 6017:     if ((am == NULL) || (from == NULL) || (token == NULL))
 6018: 	return(NULL);
 6019:     if (min < 1)
 6020: 	return(NULL);
 6021:     if ((max < min) || (max < 1))
 6022: 	return(NULL);
 6023:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 6024:     if (atom == NULL)
 6025: 	return(NULL);
 6026:     if ((token2 == NULL) || (*token2 == 0)) {
 6027: 	atom->valuep = xmlStrdup(token);
 6028:     } else {
 6029: 	int lenn, lenp;
 6030: 	xmlChar *str;
 6031: 
 6032: 	lenn = strlen((char *) token2);
 6033: 	lenp = strlen((char *) token);
 6034: 
 6035: 	str = (xmlChar *) xmlMallocAtomic(lenn + lenp + 2);
 6036: 	if (str == NULL) {
 6037: 	    xmlRegFreeAtom(atom);
 6038: 	    return(NULL);
 6039: 	}
 6040: 	memcpy(&str[0], token, lenp);
 6041: 	str[lenp] = '|';
 6042: 	memcpy(&str[lenp + 1], token2, lenn);
 6043: 	str[lenn + lenp + 1] = 0;
 6044: 
 6045: 	atom->valuep = str;
 6046:     }    
 6047:     atom->data = data;
 6048:     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
 6049:     atom->min = min;
 6050:     atom->max = max;
 6051:     /*
 6052:      * associate a counter to the transition.
 6053:      */
 6054:     counter = xmlRegGetCounter(am);
 6055:     am->counters[counter].min = 1;
 6056:     am->counters[counter].max = 1;
 6057: 
 6058:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 6059:     if (to == NULL) {
 6060: 	to = xmlRegNewState(am);
 6061: 	xmlRegStatePush(am, to);
 6062:     }
 6063:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 6064:     xmlRegAtomPush(am, atom);
 6065:     am->state = to;
 6066:     return(to);
 6067: }
 6068: 
 6069:     
 6070: 
 6071: /**
 6072:  * xmlAutomataNewOnceTrans:
 6073:  * @am: an automata
 6074:  * @from: the starting point of the transition
 6075:  * @to: the target point of the transition or NULL
 6076:  * @token: the input string associated to that transition
 6077:  * @min:  the minimum successive occurences of token
 6078:  * @max:  the maximum successive occurences of token
 6079:  * @data:  data associated to the transition
 6080:  *
 6081:  * If @to is NULL, this creates first a new target state in the automata
 6082:  * and then adds a transition from the @from state to the target state
 6083:  * activated by a succession of input of value @token and whose number
 6084:  * is between @min and @max, moreover that transition can only be crossed
 6085:  * once.
 6086:  *
 6087:  * Returns the target state or NULL in case of error
 6088:  */
 6089: xmlAutomataStatePtr
 6090: xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6091: 			 xmlAutomataStatePtr to, const xmlChar *token,
 6092: 			 int min, int max, void *data) {
 6093:     xmlRegAtomPtr atom;
 6094:     int counter;
 6095: 
 6096:     if ((am == NULL) || (from == NULL) || (token == NULL))
 6097: 	return(NULL);
 6098:     if (min < 1)
 6099: 	return(NULL);
 6100:     if ((max < min) || (max < 1))
 6101: 	return(NULL);
 6102:     atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
 6103:     if (atom == NULL)
 6104: 	return(NULL);
 6105:     atom->valuep = xmlStrdup(token);
 6106:     atom->data = data;
 6107:     atom->quant = XML_REGEXP_QUANT_ONCEONLY;
 6108:     atom->min = min;
 6109:     atom->max = max;
 6110:     /*
 6111:      * associate a counter to the transition.
 6112:      */
 6113:     counter = xmlRegGetCounter(am);
 6114:     am->counters[counter].min = 1;
 6115:     am->counters[counter].max = 1;
 6116: 
 6117:     /* xmlFAGenerateTransitions(am, from, to, atom); */
 6118:     if (to == NULL) {
 6119: 	to = xmlRegNewState(am);
 6120: 	xmlRegStatePush(am, to);
 6121:     }
 6122:     xmlRegStateAddTrans(am, from, atom, to, counter, -1);
 6123:     xmlRegAtomPush(am, atom);
 6124:     am->state = to;
 6125:     return(to);
 6126: }
 6127: 
 6128: /**
 6129:  * xmlAutomataNewState:
 6130:  * @am: an automata
 6131:  *
 6132:  * Create a new disconnected state in the automata
 6133:  *
 6134:  * Returns the new state or NULL in case of error
 6135:  */
 6136: xmlAutomataStatePtr
 6137: xmlAutomataNewState(xmlAutomataPtr am) {
 6138:     xmlAutomataStatePtr to; 
 6139: 
 6140:     if (am == NULL)
 6141: 	return(NULL);
 6142:     to = xmlRegNewState(am);
 6143:     xmlRegStatePush(am, to);
 6144:     return(to);
 6145: }
 6146: 
 6147: /**
 6148:  * xmlAutomataNewEpsilon:
 6149:  * @am: an automata
 6150:  * @from: the starting point of the transition
 6151:  * @to: the target point of the transition or NULL
 6152:  *
 6153:  * If @to is NULL, this creates first a new target state in the automata
 6154:  * and then adds an epsilon transition from the @from state to the
 6155:  * target state
 6156:  *
 6157:  * Returns the target state or NULL in case of error
 6158:  */
 6159: xmlAutomataStatePtr
 6160: xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6161: 		      xmlAutomataStatePtr to) {
 6162:     if ((am == NULL) || (from == NULL))
 6163: 	return(NULL);
 6164:     xmlFAGenerateEpsilonTransition(am, from, to);
 6165:     if (to == NULL)
 6166: 	return(am->state);
 6167:     return(to);
 6168: }
 6169: 
 6170: /**
 6171:  * xmlAutomataNewAllTrans:
 6172:  * @am: an automata
 6173:  * @from: the starting point of the transition
 6174:  * @to: the target point of the transition or NULL
 6175:  * @lax: allow to transition if not all all transitions have been activated
 6176:  *
 6177:  * If @to is NULL, this creates first a new target state in the automata
 6178:  * and then adds a an ALL transition from the @from state to the
 6179:  * target state. That transition is an epsilon transition allowed only when
 6180:  * all transitions from the @from node have been activated.
 6181:  *
 6182:  * Returns the target state or NULL in case of error
 6183:  */
 6184: xmlAutomataStatePtr
 6185: xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6186: 		       xmlAutomataStatePtr to, int lax) {
 6187:     if ((am == NULL) || (from == NULL))
 6188: 	return(NULL);
 6189:     xmlFAGenerateAllTransition(am, from, to, lax);
 6190:     if (to == NULL)
 6191: 	return(am->state);
 6192:     return(to);
 6193: }
 6194: 
 6195: /**
 6196:  * xmlAutomataNewCounter:
 6197:  * @am: an automata
 6198:  * @min:  the minimal value on the counter
 6199:  * @max:  the maximal value on the counter
 6200:  *
 6201:  * Create a new counter
 6202:  *
 6203:  * Returns the counter number or -1 in case of error
 6204:  */
 6205: int		
 6206: xmlAutomataNewCounter(xmlAutomataPtr am, int min, int max) {
 6207:     int ret;
 6208: 
 6209:     if (am == NULL)
 6210: 	return(-1);
 6211: 
 6212:     ret = xmlRegGetCounter(am);
 6213:     if (ret < 0)
 6214: 	return(-1);
 6215:     am->counters[ret].min = min;
 6216:     am->counters[ret].max = max;
 6217:     return(ret);
 6218: }
 6219: 
 6220: /**
 6221:  * xmlAutomataNewCountedTrans:
 6222:  * @am: an automata
 6223:  * @from: the starting point of the transition
 6224:  * @to: the target point of the transition or NULL
 6225:  * @counter: the counter associated to that transition
 6226:  *
 6227:  * If @to is NULL, this creates first a new target state in the automata
 6228:  * and then adds an epsilon transition from the @from state to the target state
 6229:  * which will increment the counter provided
 6230:  *
 6231:  * Returns the target state or NULL in case of error
 6232:  */
 6233: xmlAutomataStatePtr
 6234: xmlAutomataNewCountedTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6235: 		xmlAutomataStatePtr to, int counter) {
 6236:     if ((am == NULL) || (from == NULL) || (counter < 0))
 6237: 	return(NULL);
 6238:     xmlFAGenerateCountedEpsilonTransition(am, from, to, counter);
 6239:     if (to == NULL)
 6240: 	return(am->state);
 6241:     return(to);
 6242: }
 6243: 
 6244: /**
 6245:  * xmlAutomataNewCounterTrans:
 6246:  * @am: an automata
 6247:  * @from: the starting point of the transition
 6248:  * @to: the target point of the transition or NULL
 6249:  * @counter: the counter associated to that transition
 6250:  *
 6251:  * If @to is NULL, this creates first a new target state in the automata
 6252:  * and then adds an epsilon transition from the @from state to the target state
 6253:  * which will be allowed only if the counter is within the right range.
 6254:  *
 6255:  * Returns the target state or NULL in case of error
 6256:  */
 6257: xmlAutomataStatePtr
 6258: xmlAutomataNewCounterTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
 6259: 		xmlAutomataStatePtr to, int counter) {
 6260:     if ((am == NULL) || (from == NULL) || (counter < 0))
 6261: 	return(NULL);
 6262:     xmlFAGenerateCountedTransition(am, from, to, counter);
 6263:     if (to == NULL)
 6264: 	return(am->state);
 6265:     return(to);
 6266: }
 6267: 
 6268: /**
 6269:  * xmlAutomataCompile:
 6270:  * @am: an automata
 6271:  *
 6272:  * Compile the automata into a Reg Exp ready for being executed.
 6273:  * The automata should be free after this point.
 6274:  *
 6275:  * Returns the compiled regexp or NULL in case of error
 6276:  */
 6277: xmlRegexpPtr          
 6278: xmlAutomataCompile(xmlAutomataPtr am) {
 6279:     xmlRegexpPtr ret;
 6280: 
 6281:     if ((am == NULL) || (am->error != 0)) return(NULL);
 6282:     xmlFAEliminateEpsilonTransitions(am);
 6283:     /* xmlFAComputesDeterminism(am); */
 6284:     ret = xmlRegEpxFromParse(am);
 6285: 
 6286:     return(ret);
 6287: }
 6288: 
 6289: /**
 6290:  * xmlAutomataIsDeterminist:
 6291:  * @am: an automata
 6292:  *
 6293:  * Checks if an automata is determinist.
 6294:  *
 6295:  * Returns 1 if true, 0 if not, and -1 in case of error
 6296:  */
 6297: int          
 6298: xmlAutomataIsDeterminist(xmlAutomataPtr am) {
 6299:     int ret;
 6300: 
 6301:     if (am == NULL)
 6302: 	return(-1);
 6303: 
 6304:     ret = xmlFAComputesDeterminism(am);
 6305:     return(ret);
 6306: }
 6307: #endif /* LIBXML_AUTOMATA_ENABLED */
 6308: 
 6309: #ifdef LIBXML_EXPR_ENABLED
 6310: /************************************************************************
 6311:  *									*
 6312:  *		Formal Expression handling code				*
 6313:  *									*
 6314:  ************************************************************************/
 6315: /************************************************************************
 6316:  *									*
 6317:  *		Expression handling context				*
 6318:  *									*
 6319:  ************************************************************************/
 6320: 
 6321: struct _xmlExpCtxt {
 6322:     xmlDictPtr dict;
 6323:     xmlExpNodePtr *table;
 6324:     int size;
 6325:     int nbElems;
 6326:     int nb_nodes;
 6327:     int maxNodes;
 6328:     const char *expr;
 6329:     const char *cur;
 6330:     int nb_cons;
 6331:     int tabSize;
 6332: };
 6333: 
 6334: /**
 6335:  * xmlExpNewCtxt:
 6336:  * @maxNodes:  the maximum number of nodes
 6337:  * @dict:  optional dictionnary to use internally
 6338:  *
 6339:  * Creates a new context for manipulating expressions
 6340:  *
 6341:  * Returns the context or NULL in case of error
 6342:  */
 6343: xmlExpCtxtPtr
 6344: xmlExpNewCtxt(int maxNodes, xmlDictPtr dict) {
 6345:     xmlExpCtxtPtr ret;
 6346:     int size = 256;
 6347: 
 6348:     if (maxNodes <= 4096)
 6349:         maxNodes = 4096;
 6350:     
 6351:     ret = (xmlExpCtxtPtr) xmlMalloc(sizeof(xmlExpCtxt));
 6352:     if (ret == NULL)
 6353:         return(NULL);
 6354:     memset(ret, 0, sizeof(xmlExpCtxt));
 6355:     ret->size = size;
 6356:     ret->nbElems = 0;
 6357:     ret->maxNodes = maxNodes;
 6358:     ret->table = xmlMalloc(size * sizeof(xmlExpNodePtr));
 6359:     if (ret->table == NULL) {
 6360:         xmlFree(ret);
 6361: 	return(NULL);
 6362:     }
 6363:     memset(ret->table, 0, size * sizeof(xmlExpNodePtr));
 6364:     if (dict == NULL) {
 6365:         ret->dict = xmlDictCreate();
 6366: 	if (ret->dict == NULL) {
 6367: 	    xmlFree(ret->table);
 6368: 	    xmlFree(ret);
 6369: 	    return(NULL);
 6370: 	}
 6371:     } else {
 6372:         ret->dict = dict;
 6373: 	xmlDictReference(ret->dict);
 6374:     }
 6375:     return(ret);
 6376: }
 6377: 
 6378: /**
 6379:  * xmlExpFreeCtxt:
 6380:  * @ctxt:  an expression context
 6381:  *
 6382:  * Free an expression context
 6383:  */
 6384: void
 6385: xmlExpFreeCtxt(xmlExpCtxtPtr ctxt) {
 6386:     if (ctxt == NULL)
 6387:         return;
 6388:     xmlDictFree(ctxt->dict);
 6389:     if (ctxt->table != NULL)
 6390: 	xmlFree(ctxt->table);
 6391:     xmlFree(ctxt);
 6392: }
 6393: 
 6394: /************************************************************************
 6395:  *									*
 6396:  *		Structure associated to an expression node		*
 6397:  *									*
 6398:  ************************************************************************/
 6399: #define MAX_NODES 10000
 6400: 
 6401: /* #define DEBUG_DERIV */
 6402: 
 6403: /*
 6404:  * TODO: 
 6405:  * - Wildcards
 6406:  * - public API for creation
 6407:  *
 6408:  * Started
 6409:  * - regression testing
 6410:  *
 6411:  * Done
 6412:  * - split into module and test tool
 6413:  * - memleaks
 6414:  */
 6415: 
 6416: typedef enum {
 6417:     XML_EXP_NILABLE = (1 << 0)
 6418: } xmlExpNodeInfo;
 6419: 
 6420: #define IS_NILLABLE(node) ((node)->info & XML_EXP_NILABLE)
 6421: 
 6422: struct _xmlExpNode {
 6423:     unsigned char type;/* xmlExpNodeType */
 6424:     unsigned char info;/* OR of xmlExpNodeInfo */
 6425:     unsigned short key;	/* the hash key */
 6426:     unsigned int ref;	/* The number of references */
 6427:     int c_max;		/* the maximum length it can consume */
 6428:     xmlExpNodePtr exp_left;
 6429:     xmlExpNodePtr next;/* the next node in the hash table or free list */
 6430:     union {
 6431: 	struct {
 6432: 	    int f_min;
 6433: 	    int f_max;
 6434: 	} count;
 6435: 	struct {
 6436: 	    xmlExpNodePtr f_right;
 6437: 	} children;
 6438:         const xmlChar *f_str;
 6439:     } field;
 6440: };
 6441: 
 6442: #define exp_min field.count.f_min
 6443: #define exp_max field.count.f_max
 6444: /* #define exp_left field.children.f_left */
 6445: #define exp_right field.children.f_right
 6446: #define exp_str field.f_str
 6447: 
 6448: static xmlExpNodePtr xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type);
 6449: static xmlExpNode forbiddenExpNode = {
 6450:     XML_EXP_FORBID, 0, 0, 0, 0, NULL, NULL, {{ 0, 0}}
 6451: };
 6452: xmlExpNodePtr forbiddenExp = &forbiddenExpNode;
 6453: static xmlExpNode emptyExpNode = {
 6454:     XML_EXP_EMPTY, 1, 0, 0, 0, NULL, NULL, {{ 0, 0}}
 6455: };
 6456: xmlExpNodePtr emptyExp = &emptyExpNode;
 6457: 
 6458: /************************************************************************
 6459:  *									*
 6460:  *  The custom hash table for unicity and canonicalization		*
 6461:  *  of sub-expressions pointers						*
 6462:  *									*
 6463:  ************************************************************************/
 6464: /*
 6465:  * xmlExpHashNameComputeKey:
 6466:  * Calculate the hash key for a token
 6467:  */
 6468: static unsigned short
 6469: xmlExpHashNameComputeKey(const xmlChar *name) {
 6470:     unsigned short value = 0L;
 6471:     char ch;
 6472:     
 6473:     if (name != NULL) {
 6474: 	value += 30 * (*name);
 6475: 	while ((ch = *name++) != 0) {
 6476: 	    value = value ^ ((value << 5) + (value >> 3) + (unsigned long)ch);
 6477: 	}
 6478:     }
 6479:     return (value);
 6480: }
 6481: 
 6482: /*
 6483:  * xmlExpHashComputeKey:
 6484:  * Calculate the hash key for a compound expression
 6485:  */
 6486: static unsigned short
 6487: xmlExpHashComputeKey(xmlExpNodeType type, xmlExpNodePtr left,
 6488:                      xmlExpNodePtr right) {
 6489:     unsigned long value;
 6490:     unsigned short ret;
 6491:     
 6492:     switch (type) {
 6493:         case XML_EXP_SEQ:
 6494: 	    value = left->key;
 6495: 	    value += right->key;
 6496: 	    value *= 3;
 6497: 	    ret = (unsigned short) value;
 6498: 	    break;
 6499:         case XML_EXP_OR:
 6500: 	    value = left->key;
 6501: 	    value += right->key;
 6502: 	    value *= 7;
 6503: 	    ret = (unsigned short) value;
 6504: 	    break;
 6505:         case XML_EXP_COUNT:
 6506: 	    value = left->key;
 6507: 	    value += right->key;
 6508: 	    ret = (unsigned short) value;
 6509: 	    break;
 6510: 	default:
 6511: 	    ret = 0;
 6512:     }
 6513:     return(ret);
 6514: }
 6515: 
 6516: 
 6517: static xmlExpNodePtr
 6518: xmlExpNewNode(xmlExpCtxtPtr ctxt, xmlExpNodeType type) {
 6519:     xmlExpNodePtr ret;
 6520: 
 6521:     if (ctxt->nb_nodes >= MAX_NODES)
 6522:         return(NULL);
 6523:     ret = (xmlExpNodePtr) xmlMalloc(sizeof(xmlExpNode));
 6524:     if (ret == NULL)
 6525:         return(NULL);
 6526:     memset(ret, 0, sizeof(xmlExpNode));
 6527:     ret->type = type;
 6528:     ret->next = NULL;
 6529:     ctxt->nb_nodes++;
 6530:     ctxt->nb_cons++;
 6531:     return(ret);
 6532: }
 6533: 
 6534: /**
 6535:  * xmlExpHashGetEntry:
 6536:  * @table: the hash table
 6537:  *
 6538:  * Get the unique entry from the hash table. The entry is created if
 6539:  * needed. @left and @right are consumed, i.e. their ref count will
 6540:  * be decremented by the operation.
 6541:  *
 6542:  * Returns the pointer or NULL in case of error
 6543:  */
 6544: static xmlExpNodePtr
 6545: xmlExpHashGetEntry(xmlExpCtxtPtr ctxt, xmlExpNodeType type,
 6546:                    xmlExpNodePtr left, xmlExpNodePtr right,
 6547: 		   const xmlChar *name, int min, int max) {
 6548:     unsigned short kbase, key;
 6549:     xmlExpNodePtr entry;
 6550:     xmlExpNodePtr insert;
 6551: 
 6552:     if (ctxt == NULL)
 6553: 	return(NULL);
 6554: 
 6555:     /*
 6556:      * Check for duplicate and insertion location.
 6557:      */
 6558:     if (type == XML_EXP_ATOM) {
 6559: 	kbase = xmlExpHashNameComputeKey(name);
 6560:     } else if (type == XML_EXP_COUNT) {
 6561:         /* COUNT reduction rule 1 */
 6562: 	/* a{1} -> a */
 6563: 	if (min == max) {
 6564: 	    if (min == 1) {
 6565: 		return(left);
 6566: 	    }
 6567: 	    if (min == 0) {
 6568: 		xmlExpFree(ctxt, left);
 6569: 	        return(emptyExp);
 6570: 	    }
 6571: 	}
 6572: 	if (min < 0) {
 6573: 	    xmlExpFree(ctxt, left);
 6574: 	    return(forbiddenExp);
 6575: 	}
 6576:         if (max == -1)
 6577: 	    kbase = min + 79;
 6578: 	else
 6579: 	    kbase = max - min;
 6580: 	kbase += left->key;
 6581:     } else if (type == XML_EXP_OR) {
 6582:         /* Forbid reduction rules */
 6583:         if (left->type == XML_EXP_FORBID) {
 6584: 	    xmlExpFree(ctxt, left);
 6585: 	    return(right);
 6586: 	}
 6587:         if (right->type == XML_EXP_FORBID) {
 6588: 	    xmlExpFree(ctxt, right);
 6589: 	    return(left);
 6590: 	}
 6591: 
 6592:         /* OR reduction rule 1 */
 6593: 	/* a | a reduced to a */
 6594:         if (left == right) {
 6595: 	    left->ref--;
 6596: 	    return(left);
 6597: 	}
 6598:         /* OR canonicalization rule 1 */
 6599: 	/* linearize (a | b) | c into a | (b | c) */
 6600:         if ((left->type == XML_EXP_OR) && (right->type != XML_EXP_OR)) {
 6601: 	    xmlExpNodePtr tmp = left;
 6602:             left = right;
 6603: 	    right = tmp;
 6604: 	}
 6605:         /* OR reduction rule 2 */
 6606: 	/* a | (a | b) and b | (a | b) are reduced to a | b */
 6607:         if (right->type == XML_EXP_OR) {
 6608: 	    if ((left == right->exp_left) ||
 6609: 	        (left == right->exp_right)) {
 6610: 		xmlExpFree(ctxt, left);
 6611: 		return(right);
 6612: 	    }
 6613: 	}
 6614:         /* OR canonicalization rule 2 */
 6615: 	/* linearize (a | b) | c into a | (b | c) */
 6616:         if (left->type == XML_EXP_OR) {
 6617: 	    xmlExpNodePtr tmp;
 6618: 
 6619: 	    /* OR canonicalization rule 2 */
 6620: 	    if ((left->exp_right->type != XML_EXP_OR) &&
 6621: 	        (left->exp_right->key < left->exp_left->key)) {
 6622: 	        tmp = left->exp_right;
 6623: 		left->exp_right = left->exp_left;
 6624: 		left->exp_left = tmp;
 6625: 	    }
 6626: 	    left->exp_right->ref++;
 6627: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_right, right,
 6628: 	                             NULL, 0, 0);
 6629: 	    left->exp_left->ref++;
 6630: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left->exp_left, tmp,
 6631: 	                             NULL, 0, 0);
 6632: 	
 6633: 	    xmlExpFree(ctxt, left);
 6634: 	    return(tmp);
 6635: 	}
 6636: 	if (right->type == XML_EXP_OR) {
 6637: 	    /* Ordering in the tree */
 6638: 	    /* C | (A | B) -> A | (B | C) */
 6639: 	    if (left->key > right->exp_right->key) {
 6640: 		xmlExpNodePtr tmp;
 6641: 		right->exp_right->ref++;
 6642: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_right,
 6643: 		                         left, NULL, 0, 0);
 6644: 		right->exp_left->ref++;
 6645: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
 6646: 		                         tmp, NULL, 0, 0);
 6647: 		xmlExpFree(ctxt, right);
 6648: 		return(tmp);
 6649: 	    }
 6650: 	    /* Ordering in the tree */
 6651: 	    /* B | (A | C) -> A | (B | C) */
 6652: 	    if (left->key > right->exp_left->key) {
 6653: 		xmlExpNodePtr tmp;
 6654: 		right->exp_right->ref++;
 6655: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, left,
 6656: 		                         right->exp_right, NULL, 0, 0);
 6657: 		right->exp_left->ref++;
 6658: 		tmp = xmlExpHashGetEntry(ctxt, XML_EXP_OR, right->exp_left,
 6659: 		                         tmp, NULL, 0, 0);
 6660: 		xmlExpFree(ctxt, right);
 6661: 		return(tmp);
 6662: 	    }
 6663: 	}
 6664: 	/* we know both types are != XML_EXP_OR here */
 6665:         else if (left->key > right->key) {
 6666: 	    xmlExpNodePtr tmp = left;
 6667:             left = right;
 6668: 	    right = tmp;
 6669: 	}
 6670: 	kbase = xmlExpHashComputeKey(type, left, right);
 6671:     } else if (type == XML_EXP_SEQ) {
 6672:         /* Forbid reduction rules */
 6673:         if (left->type == XML_EXP_FORBID) {
 6674: 	    xmlExpFree(ctxt, right);
 6675: 	    return(left);
 6676: 	}
 6677:         if (right->type == XML_EXP_FORBID) {
 6678: 	    xmlExpFree(ctxt, left);
 6679: 	    return(right);
 6680: 	}
 6681:         /* Empty reduction rules */
 6682:         if (right->type == XML_EXP_EMPTY) {
 6683: 	    return(left);
 6684: 	}
 6685:         if (left->type == XML_EXP_EMPTY) {
 6686: 	    return(right);
 6687: 	}
 6688: 	kbase = xmlExpHashComputeKey(type, left, right);
 6689:     } else 
 6690:         return(NULL);
 6691: 
 6692:     key = kbase % ctxt->size;
 6693:     if (ctxt->table[key] != NULL) {
 6694: 	for (insert = ctxt->table[key]; insert != NULL;
 6695: 	     insert = insert->next) {
 6696: 	    if ((insert->key == kbase) &&
 6697: 	        (insert->type == type)) {
 6698: 		if (type == XML_EXP_ATOM) {
 6699: 		    if (name == insert->exp_str) {
 6700: 			insert->ref++;
 6701: 			return(insert);
 6702: 		    }
 6703: 		} else if (type == XML_EXP_COUNT) {
 6704: 		    if ((insert->exp_min == min) && (insert->exp_max == max) &&
 6705: 		        (insert->exp_left == left)) {
 6706: 			insert->ref++;
 6707: 			left->ref--;
 6708: 			return(insert);
 6709: 		    }
 6710: 		} else if ((insert->exp_left == left) &&
 6711: 			   (insert->exp_right == right)) {
 6712: 		    insert->ref++;
 6713: 		    left->ref--;
 6714: 		    right->ref--;
 6715: 		    return(insert);
 6716: 		}
 6717: 	    }
 6718: 	}
 6719:     }
 6720: 
 6721:     entry = xmlExpNewNode(ctxt, type);
 6722:     if (entry == NULL)
 6723:         return(NULL);
 6724:     entry->key = kbase;
 6725:     if (type == XML_EXP_ATOM) {
 6726: 	entry->exp_str = name;
 6727: 	entry->c_max = 1;
 6728:     } else if (type == XML_EXP_COUNT) {
 6729:         entry->exp_min = min;
 6730:         entry->exp_max = max;
 6731: 	entry->exp_left = left;
 6732: 	if ((min == 0) || (IS_NILLABLE(left)))
 6733: 	    entry->info |= XML_EXP_NILABLE;
 6734: 	if (max < 0)
 6735: 	    entry->c_max = -1;
 6736: 	else
 6737: 	    entry->c_max = max * entry->exp_left->c_max;
 6738:     } else {
 6739: 	entry->exp_left = left;
 6740: 	entry->exp_right = right;
 6741: 	if (type == XML_EXP_OR) {
 6742: 	    if ((IS_NILLABLE(left)) || (IS_NILLABLE(right)))
 6743: 		entry->info |= XML_EXP_NILABLE;
 6744: 	    if ((entry->exp_left->c_max == -1) ||
 6745: 	        (entry->exp_right->c_max == -1))
 6746: 		entry->c_max = -1;
 6747: 	    else if (entry->exp_left->c_max > entry->exp_right->c_max)
 6748: 	        entry->c_max = entry->exp_left->c_max;
 6749: 	    else
 6750: 	        entry->c_max = entry->exp_right->c_max;
 6751: 	} else {
 6752: 	    if ((IS_NILLABLE(left)) && (IS_NILLABLE(right)))
 6753: 		entry->info |= XML_EXP_NILABLE;
 6754: 	    if ((entry->exp_left->c_max == -1) ||
 6755: 	        (entry->exp_right->c_max == -1))
 6756: 		entry->c_max = -1;
 6757: 	    else
 6758: 	        entry->c_max = entry->exp_left->c_max + entry->exp_right->c_max;
 6759: 	}
 6760:     }
 6761:     entry->ref = 1;
 6762:     if (ctxt->table[key] != NULL)
 6763:         entry->next = ctxt->table[key];
 6764: 
 6765:     ctxt->table[key] = entry;
 6766:     ctxt->nbElems++;
 6767: 
 6768:     return(entry);
 6769: }
 6770: 
 6771: /**
 6772:  * xmlExpFree:
 6773:  * @ctxt: the expression context
 6774:  * @exp: the expression
 6775:  *
 6776:  * Dereference the expression
 6777:  */
 6778: void
 6779: xmlExpFree(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp) {
 6780:     if ((exp == NULL) || (exp == forbiddenExp) || (exp == emptyExp))
 6781:         return;
 6782:     exp->ref--;
 6783:     if (exp->ref == 0) {
 6784:         unsigned short key;
 6785: 
 6786:         /* Unlink it first from the hash table */
 6787: 	key = exp->key % ctxt->size;
 6788: 	if (ctxt->table[key] == exp) {
 6789: 	    ctxt->table[key] = exp->next;
 6790: 	} else {
 6791: 	    xmlExpNodePtr tmp;
 6792: 
 6793: 	    tmp = ctxt->table[key];
 6794: 	    while (tmp != NULL) {
 6795: 	        if (tmp->next == exp) {
 6796: 		    tmp->next = exp->next;
 6797: 		    break;
 6798: 		}
 6799: 	        tmp = tmp->next;
 6800: 	    }
 6801: 	}
 6802: 
 6803:         if ((exp->type == XML_EXP_SEQ) || (exp->type == XML_EXP_OR)) {
 6804: 	    xmlExpFree(ctxt, exp->exp_left);
 6805: 	    xmlExpFree(ctxt, exp->exp_right);
 6806: 	} else if (exp->type == XML_EXP_COUNT) {
 6807: 	    xmlExpFree(ctxt, exp->exp_left);
 6808: 	}
 6809:         xmlFree(exp);
 6810: 	ctxt->nb_nodes--;
 6811:     }
 6812: }
 6813: 
 6814: /**
 6815:  * xmlExpRef:
 6816:  * @exp: the expression
 6817:  *
 6818:  * Increase the reference count of the expression
 6819:  */
 6820: void
 6821: xmlExpRef(xmlExpNodePtr exp) {
 6822:     if (exp != NULL)
 6823:         exp->ref++;
 6824: }
 6825: 
 6826: /**
 6827:  * xmlExpNewAtom:
 6828:  * @ctxt: the expression context
 6829:  * @name: the atom name
 6830:  * @len: the atom name lenght in byte (or -1);
 6831:  *
 6832:  * Get the atom associated to this name from that context
 6833:  *
 6834:  * Returns the node or NULL in case of error
 6835:  */
 6836: xmlExpNodePtr
 6837: xmlExpNewAtom(xmlExpCtxtPtr ctxt, const xmlChar *name, int len) {
 6838:     if ((ctxt == NULL) || (name == NULL))
 6839:         return(NULL);
 6840:     name = xmlDictLookup(ctxt->dict, name, len);
 6841:     if (name == NULL)
 6842:         return(NULL);
 6843:     return(xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, name, 0, 0));
 6844: }
 6845: 
 6846: /**
 6847:  * xmlExpNewOr:
 6848:  * @ctxt: the expression context
 6849:  * @left: left expression
 6850:  * @right: right expression
 6851:  *
 6852:  * Get the atom associated to the choice @left | @right
 6853:  * Note that @left and @right are consumed in the operation, to keep
 6854:  * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
 6855:  * this is true even in case of failure (unless ctxt == NULL).
 6856:  *
 6857:  * Returns the node or NULL in case of error
 6858:  */
 6859: xmlExpNodePtr
 6860: xmlExpNewOr(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
 6861:     if (ctxt == NULL)
 6862:         return(NULL);
 6863:     if ((left == NULL) || (right == NULL)) {
 6864:         xmlExpFree(ctxt, left);
 6865:         xmlExpFree(ctxt, right);
 6866:         return(NULL);
 6867:     }
 6868:     return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, left, right, NULL, 0, 0));
 6869: }
 6870: 
 6871: /**
 6872:  * xmlExpNewSeq:
 6873:  * @ctxt: the expression context
 6874:  * @left: left expression
 6875:  * @right: right expression
 6876:  *
 6877:  * Get the atom associated to the sequence @left , @right
 6878:  * Note that @left and @right are consumed in the operation, to keep
 6879:  * an handle on them use xmlExpRef() and use xmlExpFree() to release them,
 6880:  * this is true even in case of failure (unless ctxt == NULL).
 6881:  *
 6882:  * Returns the node or NULL in case of error
 6883:  */
 6884: xmlExpNodePtr
 6885: xmlExpNewSeq(xmlExpCtxtPtr ctxt, xmlExpNodePtr left, xmlExpNodePtr right) {
 6886:     if (ctxt == NULL)
 6887:         return(NULL);
 6888:     if ((left == NULL) || (right == NULL)) {
 6889:         xmlExpFree(ctxt, left);
 6890:         xmlExpFree(ctxt, right);
 6891:         return(NULL);
 6892:     }
 6893:     return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, left, right, NULL, 0, 0));
 6894: }
 6895: 
 6896: /**
 6897:  * xmlExpNewRange:
 6898:  * @ctxt: the expression context
 6899:  * @subset: the expression to be repeated
 6900:  * @min: the lower bound for the repetition
 6901:  * @max: the upper bound for the repetition, -1 means infinite
 6902:  *
 6903:  * Get the atom associated to the range (@subset){@min, @max}
 6904:  * Note that @subset is consumed in the operation, to keep
 6905:  * an handle on it use xmlExpRef() and use xmlExpFree() to release it,
 6906:  * this is true even in case of failure (unless ctxt == NULL).
 6907:  *
 6908:  * Returns the node or NULL in case of error
 6909:  */
 6910: xmlExpNodePtr
 6911: xmlExpNewRange(xmlExpCtxtPtr ctxt, xmlExpNodePtr subset, int min, int max) {
 6912:     if (ctxt == NULL)
 6913:         return(NULL);
 6914:     if ((subset == NULL) || (min < 0) || (max < -1) ||
 6915:         ((max >= 0) && (min > max))) {
 6916: 	xmlExpFree(ctxt, subset);
 6917:         return(NULL);
 6918:     }
 6919:     return(xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, subset,
 6920:                               NULL, NULL, min, max));
 6921: }
 6922: 
 6923: /************************************************************************
 6924:  *									*
 6925:  *		Public API for operations on expressions		*
 6926:  *									*
 6927:  ************************************************************************/
 6928: 
 6929: static int
 6930: xmlExpGetLanguageInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 6931:                      const xmlChar**list, int len, int nb) {
 6932:     int tmp, tmp2;
 6933: tail:
 6934:     switch (exp->type) {
 6935:         case XML_EXP_EMPTY:
 6936: 	    return(0);
 6937:         case XML_EXP_ATOM:
 6938: 	    for (tmp = 0;tmp < nb;tmp++)
 6939: 	        if (list[tmp] == exp->exp_str)
 6940: 		    return(0);
 6941:             if (nb >= len)
 6942: 	        return(-2);
 6943: 	    list[nb] = exp->exp_str;
 6944: 	    return(1);
 6945:         case XML_EXP_COUNT:
 6946: 	    exp = exp->exp_left;
 6947: 	    goto tail;
 6948:         case XML_EXP_SEQ:
 6949:         case XML_EXP_OR:
 6950: 	    tmp = xmlExpGetLanguageInt(ctxt, exp->exp_left, list, len, nb);
 6951: 	    if (tmp < 0)
 6952: 	        return(tmp);
 6953: 	    tmp2 = xmlExpGetLanguageInt(ctxt, exp->exp_right, list, len,
 6954: 	                                nb + tmp);
 6955: 	    if (tmp2 < 0)
 6956: 	        return(tmp2);
 6957:             return(tmp + tmp2);
 6958:     }
 6959:     return(-1);
 6960: }
 6961: 
 6962: /**
 6963:  * xmlExpGetLanguage:
 6964:  * @ctxt: the expression context
 6965:  * @exp: the expression
 6966:  * @langList: where to store the tokens
 6967:  * @len: the allocated lenght of @list
 6968:  *
 6969:  * Find all the strings used in @exp and store them in @list
 6970:  *
 6971:  * Returns the number of unique strings found, -1 in case of errors and
 6972:  *         -2 if there is more than @len strings
 6973:  */
 6974: int
 6975: xmlExpGetLanguage(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 6976:                   const xmlChar**langList, int len) {
 6977:     if ((ctxt == NULL) || (exp == NULL) || (langList == NULL) || (len <= 0))
 6978:         return(-1);
 6979:     return(xmlExpGetLanguageInt(ctxt, exp, langList, len, 0));
 6980: }
 6981: 
 6982: static int
 6983: xmlExpGetStartInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 6984:                   const xmlChar**list, int len, int nb) {
 6985:     int tmp, tmp2;
 6986: tail:
 6987:     switch (exp->type) {
 6988:         case XML_EXP_FORBID:
 6989: 	    return(0);
 6990:         case XML_EXP_EMPTY:
 6991: 	    return(0);
 6992:         case XML_EXP_ATOM:
 6993: 	    for (tmp = 0;tmp < nb;tmp++)
 6994: 	        if (list[tmp] == exp->exp_str)
 6995: 		    return(0);
 6996:             if (nb >= len)
 6997: 	        return(-2);
 6998: 	    list[nb] = exp->exp_str;
 6999: 	    return(1);
 7000:         case XML_EXP_COUNT:
 7001: 	    exp = exp->exp_left;
 7002: 	    goto tail;
 7003:         case XML_EXP_SEQ:
 7004: 	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
 7005: 	    if (tmp < 0)
 7006: 	        return(tmp);
 7007: 	    if (IS_NILLABLE(exp->exp_left)) {
 7008: 		tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
 7009: 					    nb + tmp);
 7010: 		if (tmp2 < 0)
 7011: 		    return(tmp2);
 7012: 		tmp += tmp2;
 7013: 	    }
 7014:             return(tmp);
 7015:         case XML_EXP_OR:
 7016: 	    tmp = xmlExpGetStartInt(ctxt, exp->exp_left, list, len, nb);
 7017: 	    if (tmp < 0)
 7018: 	        return(tmp);
 7019: 	    tmp2 = xmlExpGetStartInt(ctxt, exp->exp_right, list, len,
 7020: 	                                nb + tmp);
 7021: 	    if (tmp2 < 0)
 7022: 	        return(tmp2);
 7023:             return(tmp + tmp2);
 7024:     }
 7025:     return(-1);
 7026: }
 7027: 
 7028: /**
 7029:  * xmlExpGetStart:
 7030:  * @ctxt: the expression context
 7031:  * @exp: the expression
 7032:  * @tokList: where to store the tokens
 7033:  * @len: the allocated lenght of @list
 7034:  *
 7035:  * Find all the strings that appears at the start of the languages
 7036:  * accepted by @exp and store them in @list. E.g. for (a, b) | c
 7037:  * it will return the list [a, c]
 7038:  *
 7039:  * Returns the number of unique strings found, -1 in case of errors and
 7040:  *         -2 if there is more than @len strings
 7041:  */
 7042: int
 7043: xmlExpGetStart(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, 
 7044:                const xmlChar**tokList, int len) {
 7045:     if ((ctxt == NULL) || (exp == NULL) || (tokList == NULL) || (len <= 0))
 7046:         return(-1);
 7047:     return(xmlExpGetStartInt(ctxt, exp, tokList, len, 0));
 7048: }
 7049: 
 7050: /**
 7051:  * xmlExpIsNillable:
 7052:  * @exp: the expression
 7053:  *
 7054:  * Finds if the expression is nillable, i.e. if it accepts the empty sequqnce
 7055:  *
 7056:  * Returns 1 if nillable, 0 if not and -1 in case of error
 7057:  */
 7058: int
 7059: xmlExpIsNillable(xmlExpNodePtr exp) {
 7060:     if (exp == NULL)
 7061:         return(-1);
 7062:     return(IS_NILLABLE(exp) != 0);
 7063: }
 7064: 
 7065: static xmlExpNodePtr
 7066: xmlExpStringDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, const xmlChar *str)
 7067: {
 7068:     xmlExpNodePtr ret;
 7069: 
 7070:     switch (exp->type) {
 7071: 	case XML_EXP_EMPTY:
 7072: 	    return(forbiddenExp);
 7073: 	case XML_EXP_FORBID:
 7074: 	    return(forbiddenExp);
 7075: 	case XML_EXP_ATOM:
 7076: 	    if (exp->exp_str == str) {
 7077: #ifdef DEBUG_DERIV
 7078: 		printf("deriv atom: equal => Empty\n");
 7079: #endif
 7080: 	        ret = emptyExp;
 7081: 	    } else {
 7082: #ifdef DEBUG_DERIV
 7083: 		printf("deriv atom: mismatch => forbid\n");
 7084: #endif
 7085: 	        /* TODO wildcards here */
 7086: 		ret = forbiddenExp;
 7087: 	    }
 7088: 	    return(ret);
 7089: 	case XML_EXP_OR: {
 7090: 	    xmlExpNodePtr tmp;
 7091: 
 7092: #ifdef DEBUG_DERIV
 7093: 	    printf("deriv or: => or(derivs)\n");
 7094: #endif
 7095: 	    tmp = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7096: 	    if (tmp == NULL) {
 7097: 		return(NULL);
 7098: 	    }
 7099: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
 7100: 	    if (ret == NULL) {
 7101: 	        xmlExpFree(ctxt, tmp);
 7102: 		return(NULL);
 7103: 	    }
 7104:             ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret,
 7105: 			     NULL, 0, 0);
 7106: 	    return(ret);
 7107: 	}
 7108: 	case XML_EXP_SEQ:
 7109: #ifdef DEBUG_DERIV
 7110: 	    printf("deriv seq: starting with left\n");
 7111: #endif
 7112: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7113: 	    if (ret == NULL) {
 7114: 	        return(NULL);
 7115: 	    } else if (ret == forbiddenExp) {
 7116: 	        if (IS_NILLABLE(exp->exp_left)) {
 7117: #ifdef DEBUG_DERIV
 7118: 		    printf("deriv seq: left failed but nillable\n");
 7119: #endif
 7120: 		    ret = xmlExpStringDeriveInt(ctxt, exp->exp_right, str);
 7121: 		}
 7122: 	    } else {
 7123: #ifdef DEBUG_DERIV
 7124: 		printf("deriv seq: left match => sequence\n");
 7125: #endif
 7126: 	        exp->exp_right->ref++;
 7127: 	        ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, exp->exp_right,
 7128: 		                         NULL, 0, 0);
 7129: 	    }
 7130: 	    return(ret);
 7131: 	case XML_EXP_COUNT: {
 7132: 	    int min, max;
 7133: 	    xmlExpNodePtr tmp;
 7134: 
 7135: 	    if (exp->exp_max == 0)
 7136: 		return(forbiddenExp);
 7137: 	    ret = xmlExpStringDeriveInt(ctxt, exp->exp_left, str);
 7138: 	    if (ret == NULL)
 7139: 	        return(NULL);
 7140: 	    if (ret == forbiddenExp) {
 7141: #ifdef DEBUG_DERIV
 7142: 		printf("deriv count: pattern mismatch => forbid\n");
 7143: #endif
 7144: 	        return(ret);
 7145: 	    }
 7146: 	    if (exp->exp_max == 1)
 7147: 		return(ret);
 7148: 	    if (exp->exp_max < 0) /* unbounded */
 7149: 		max = -1;
 7150: 	    else
 7151: 		max = exp->exp_max - 1;
 7152: 	    if (exp->exp_min > 0)
 7153: 		min = exp->exp_min - 1;
 7154: 	    else
 7155: 		min = 0;
 7156: 	    exp->exp_left->ref++;
 7157: 	    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left, NULL,
 7158: 				     NULL, min, max);
 7159: 	    if (ret == emptyExp) {
 7160: #ifdef DEBUG_DERIV
 7161: 		printf("deriv count: match to empty => new count\n");
 7162: #endif
 7163: 	        return(tmp);
 7164: 	    }
 7165: #ifdef DEBUG_DERIV
 7166: 	    printf("deriv count: match => sequence with new count\n");
 7167: #endif
 7168: 	    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, tmp,
 7169: 	                              NULL, 0, 0));
 7170: 	}
 7171:     }
 7172:     return(NULL);
 7173: }
 7174: 
 7175: /**
 7176:  * xmlExpStringDerive:
 7177:  * @ctxt: the expression context
 7178:  * @exp: the expression
 7179:  * @str: the string
 7180:  * @len: the string len in bytes if available
 7181:  *
 7182:  * Do one step of Brzozowski derivation of the expression @exp with
 7183:  * respect to the input string
 7184:  *
 7185:  * Returns the resulting expression or NULL in case of internal error
 7186:  */
 7187: xmlExpNodePtr
 7188: xmlExpStringDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7189:                    const xmlChar *str, int len) {
 7190:     const xmlChar *input;
 7191: 
 7192:     if ((exp == NULL) || (ctxt == NULL) || (str == NULL)) {
 7193:         return(NULL);
 7194:     }
 7195:     /*
 7196:      * check the string is in the dictionnary, if yes use an interned
 7197:      * copy, otherwise we know it's not an acceptable input
 7198:      */
 7199:     input = xmlDictExists(ctxt->dict, str, len);
 7200:     if (input == NULL) {
 7201:         return(forbiddenExp);
 7202:     }
 7203:     return(xmlExpStringDeriveInt(ctxt, exp, input));
 7204: }
 7205: 
 7206: static int
 7207: xmlExpCheckCard(xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7208:     int ret = 1;
 7209: 
 7210:     if (sub->c_max == -1) {
 7211:         if (exp->c_max != -1)
 7212: 	    ret = 0;
 7213:     } else if ((exp->c_max >= 0) && (exp->c_max < sub->c_max)) {
 7214:         ret = 0;
 7215:     }
 7216: #if 0
 7217:     if ((IS_NILLABLE(sub)) && (!IS_NILLABLE(exp)))
 7218:         ret = 0;
 7219: #endif
 7220:     return(ret);
 7221: }
 7222: 
 7223: static xmlExpNodePtr xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp,
 7224:                                         xmlExpNodePtr sub);
 7225: /**
 7226:  * xmlExpDivide:
 7227:  * @ctxt: the expressions context
 7228:  * @exp: the englobing expression
 7229:  * @sub: the subexpression
 7230:  * @mult: the multiple expression
 7231:  * @remain: the remain from the derivation of the multiple
 7232:  *
 7233:  * Check if exp is a multiple of sub, i.e. if there is a finite number n
 7234:  * so that sub{n} subsume exp
 7235:  *
 7236:  * Returns the multiple value if successful, 0 if it is not a multiple
 7237:  *         and -1 in case of internel error.
 7238:  */
 7239: 
 7240: static int
 7241: xmlExpDivide(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub,
 7242:              xmlExpNodePtr *mult, xmlExpNodePtr *remain) {
 7243:     int i;
 7244:     xmlExpNodePtr tmp, tmp2;
 7245: 
 7246:     if (mult != NULL) *mult = NULL;
 7247:     if (remain != NULL) *remain = NULL;
 7248:     if (exp->c_max == -1) return(0);
 7249:     if (IS_NILLABLE(exp) && (!IS_NILLABLE(sub))) return(0);
 7250: 
 7251:     for (i = 1;i <= exp->c_max;i++) {
 7252:         sub->ref++;
 7253:         tmp = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
 7254: 				 sub, NULL, NULL, i, i);
 7255: 	if (tmp == NULL) {
 7256: 	    return(-1);
 7257: 	}
 7258: 	if (!xmlExpCheckCard(tmp, exp)) {
 7259: 	    xmlExpFree(ctxt, tmp);
 7260: 	    continue;
 7261: 	}
 7262: 	tmp2 = xmlExpExpDeriveInt(ctxt, tmp, exp);
 7263: 	if (tmp2 == NULL) {
 7264: 	    xmlExpFree(ctxt, tmp);
 7265: 	    return(-1);
 7266: 	}
 7267: 	if ((tmp2 != forbiddenExp) && (IS_NILLABLE(tmp2))) {
 7268: 	    if (remain != NULL)
 7269: 	        *remain = tmp2;
 7270: 	    else
 7271: 	        xmlExpFree(ctxt, tmp2);
 7272: 	    if (mult != NULL)
 7273: 	        *mult = tmp;
 7274: 	    else
 7275: 	        xmlExpFree(ctxt, tmp);
 7276: #ifdef DEBUG_DERIV
 7277: 	    printf("Divide succeeded %d\n", i);
 7278: #endif
 7279: 	    return(i);
 7280: 	}
 7281: 	xmlExpFree(ctxt, tmp);
 7282: 	xmlExpFree(ctxt, tmp2);
 7283:     }
 7284: #ifdef DEBUG_DERIV
 7285:     printf("Divide failed\n");
 7286: #endif
 7287:     return(0);
 7288: }
 7289: 
 7290: /**
 7291:  * xmlExpExpDeriveInt:
 7292:  * @ctxt: the expressions context
 7293:  * @exp: the englobing expression
 7294:  * @sub: the subexpression
 7295:  *
 7296:  * Try to do a step of Brzozowski derivation but at a higher level
 7297:  * the input being a subexpression.
 7298:  *
 7299:  * Returns the resulting expression or NULL in case of internal error
 7300:  */
 7301: static xmlExpNodePtr
 7302: xmlExpExpDeriveInt(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7303:     xmlExpNodePtr ret, tmp, tmp2, tmp3;
 7304:     const xmlChar **tab;
 7305:     int len, i;
 7306: 
 7307:     /*
 7308:      * In case of equality and if the expression can only consume a finite
 7309:      * amount, then the derivation is empty
 7310:      */
 7311:     if ((exp == sub) && (exp->c_max >= 0)) {
 7312: #ifdef DEBUG_DERIV
 7313:         printf("Equal(exp, sub) and finite -> Empty\n");
 7314: #endif
 7315:         return(emptyExp);
 7316:     }
 7317:     /*
 7318:      * decompose sub sequence first
 7319:      */
 7320:     if (sub->type == XML_EXP_EMPTY) {
 7321: #ifdef DEBUG_DERIV
 7322:         printf("Empty(sub) -> Empty\n");
 7323: #endif
 7324: 	exp->ref++;
 7325:         return(exp);
 7326:     }
 7327:     if (sub->type == XML_EXP_SEQ) {
 7328: #ifdef DEBUG_DERIV
 7329:         printf("Seq(sub) -> decompose\n");
 7330: #endif
 7331:         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
 7332: 	if (tmp == NULL)
 7333: 	    return(NULL);
 7334: 	if (tmp == forbiddenExp)
 7335: 	    return(tmp);
 7336: 	ret = xmlExpExpDeriveInt(ctxt, tmp, sub->exp_right);
 7337: 	xmlExpFree(ctxt, tmp);
 7338: 	return(ret);
 7339:     }
 7340:     if (sub->type == XML_EXP_OR) {
 7341: #ifdef DEBUG_DERIV
 7342:         printf("Or(sub) -> decompose\n");
 7343: #endif
 7344:         tmp = xmlExpExpDeriveInt(ctxt, exp, sub->exp_left);
 7345: 	if (tmp == forbiddenExp)
 7346: 	    return(tmp);
 7347: 	if (tmp == NULL)
 7348: 	    return(NULL);
 7349: 	ret = xmlExpExpDeriveInt(ctxt, exp, sub->exp_right);
 7350: 	if ((ret == NULL) || (ret == forbiddenExp)) {
 7351: 	    xmlExpFree(ctxt, tmp);
 7352: 	    return(ret);
 7353: 	}
 7354: 	return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, tmp, ret, NULL, 0, 0));
 7355:     }
 7356:     if (!xmlExpCheckCard(exp, sub)) {
 7357: #ifdef DEBUG_DERIV
 7358:         printf("CheckCard(exp, sub) failed -> Forbid\n");
 7359: #endif
 7360:         return(forbiddenExp);
 7361:     }
 7362:     switch (exp->type) {
 7363:         case XML_EXP_EMPTY:
 7364: 	    if (sub == emptyExp)
 7365: 	        return(emptyExp);
 7366: #ifdef DEBUG_DERIV
 7367: 	    printf("Empty(exp) -> Forbid\n");
 7368: #endif
 7369: 	    return(forbiddenExp);
 7370:         case XML_EXP_FORBID:
 7371: #ifdef DEBUG_DERIV
 7372: 	    printf("Forbid(exp) -> Forbid\n");
 7373: #endif
 7374: 	    return(forbiddenExp);
 7375:         case XML_EXP_ATOM:
 7376: 	    if (sub->type == XML_EXP_ATOM) {
 7377: 	        /* TODO: handle wildcards */
 7378: 	        if (exp->exp_str == sub->exp_str) {
 7379: #ifdef DEBUG_DERIV
 7380: 		    printf("Atom match -> Empty\n");
 7381: #endif
 7382: 		    return(emptyExp);
 7383:                 }
 7384: #ifdef DEBUG_DERIV
 7385: 		printf("Atom mismatch -> Forbid\n");
 7386: #endif
 7387: 	        return(forbiddenExp);
 7388: 	    }
 7389: 	    if ((sub->type == XML_EXP_COUNT) &&
 7390: 	        (sub->exp_max == 1) &&
 7391: 	        (sub->exp_left->type == XML_EXP_ATOM)) {
 7392: 	        /* TODO: handle wildcards */
 7393: 	        if (exp->exp_str == sub->exp_left->exp_str) {
 7394: #ifdef DEBUG_DERIV
 7395: 		    printf("Atom match -> Empty\n");
 7396: #endif
 7397: 		    return(emptyExp);
 7398: 		}
 7399: #ifdef DEBUG_DERIV
 7400: 		printf("Atom mismatch -> Forbid\n");
 7401: #endif
 7402: 	        return(forbiddenExp);
 7403: 	    }
 7404: #ifdef DEBUG_DERIV
 7405: 	    printf("Compex exp vs Atom -> Forbid\n");
 7406: #endif
 7407: 	    return(forbiddenExp);
 7408:         case XML_EXP_SEQ:
 7409: 	    /* try to get the sequence consumed only if possible */
 7410: 	    if (xmlExpCheckCard(exp->exp_left, sub)) {
 7411: 		/* See if the sequence can be consumed directly */
 7412: #ifdef DEBUG_DERIV
 7413: 		printf("Seq trying left only\n");
 7414: #endif
 7415: 		ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7416: 		if ((ret != forbiddenExp) && (ret != NULL)) {
 7417: #ifdef DEBUG_DERIV
 7418: 		    printf("Seq trying left only worked\n");
 7419: #endif
 7420: 		    /*
 7421: 		     * TODO: assumption here that we are determinist
 7422: 		     *       i.e. we won't get to a nillable exp left
 7423: 		     *       subset which could be matched by the right
 7424: 		     *       part too.
 7425: 		     * e.g.: (a | b)+,(a | c) and 'a+,a'
 7426: 		     */
 7427: 		    exp->exp_right->ref++;
 7428: 		    return(xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
 7429: 					      exp->exp_right, NULL, 0, 0));
 7430: 		}
 7431: #ifdef DEBUG_DERIV
 7432: 	    } else {
 7433: 		printf("Seq: left too short\n");
 7434: #endif
 7435: 	    }
 7436: 	    /* Try instead to decompose */
 7437: 	    if (sub->type == XML_EXP_COUNT) {
 7438: 		int min, max;
 7439: 
 7440: #ifdef DEBUG_DERIV
 7441: 		printf("Seq: sub is a count\n");
 7442: #endif
 7443: 	        ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
 7444: 		if (ret == NULL)
 7445: 		    return(NULL);
 7446: 		if (ret != forbiddenExp) {
 7447: #ifdef DEBUG_DERIV
 7448: 		    printf("Seq , Count match on left\n");
 7449: #endif
 7450: 		    if (sub->exp_max < 0)
 7451: 		        max = -1;
 7452: 	            else
 7453: 		        max = sub->exp_max -1;
 7454: 		    if (sub->exp_min > 0)
 7455: 		        min = sub->exp_min -1;
 7456: 		    else
 7457: 		        min = 0;
 7458: 		    exp->exp_right->ref++;
 7459: 		    tmp = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret,
 7460: 		                             exp->exp_right, NULL, 0, 0);
 7461: 		    if (tmp == NULL)
 7462: 		        return(NULL);
 7463: 
 7464: 		    sub->exp_left->ref++;
 7465: 		    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT,
 7466: 				      sub->exp_left, NULL, NULL, min, max);
 7467: 		    if (tmp2 == NULL) {
 7468: 		        xmlExpFree(ctxt, tmp);
 7469: 			return(NULL);
 7470: 		    }
 7471: 		    ret = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
 7472: 		    xmlExpFree(ctxt, tmp);
 7473: 		    xmlExpFree(ctxt, tmp2);
 7474: 		    return(ret);
 7475: 		}
 7476: 	    }
 7477: 	    /* we made no progress on structured operations */
 7478: 	    break;
 7479:         case XML_EXP_OR:
 7480: #ifdef DEBUG_DERIV
 7481: 	    printf("Or , trying both side\n");
 7482: #endif
 7483: 	    ret = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7484: 	    if (ret == NULL)
 7485: 	        return(NULL);
 7486: 	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_right, sub);
 7487: 	    if (tmp == NULL) {
 7488: 		xmlExpFree(ctxt, ret);
 7489: 	        return(NULL);
 7490: 	    }
 7491: 	    return(xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp, NULL, 0, 0));
 7492:         case XML_EXP_COUNT: {
 7493: 	    int min, max;
 7494: 
 7495: 	    if (sub->type == XML_EXP_COUNT) {
 7496: 	        /*
 7497: 		 * Try to see if the loop is completely subsumed
 7498: 		 */
 7499: 	        tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub->exp_left);
 7500: 		if (tmp == NULL)
 7501: 		    return(NULL);
 7502: 		if (tmp == forbiddenExp) {
 7503: 		    int mult;
 7504: 
 7505: #ifdef DEBUG_DERIV
 7506: 		    printf("Count, Count inner don't subsume\n");
 7507: #endif
 7508: 		    mult = xmlExpDivide(ctxt, sub->exp_left, exp->exp_left,
 7509: 		                        NULL, &tmp);
 7510: 		    if (mult <= 0) {
 7511: #ifdef DEBUG_DERIV
 7512: 			printf("Count, Count not multiple => forbidden\n");
 7513: #endif
 7514:                         return(forbiddenExp);
 7515: 		    }
 7516: 		    if (sub->exp_max == -1) {
 7517: 		        max = -1;
 7518: 			if (exp->exp_max == -1) {
 7519: 			    if (exp->exp_min <= sub->exp_min * mult)
 7520: 			        min = 0;
 7521: 			    else
 7522: 			        min = exp->exp_min - sub->exp_min * mult;
 7523: 			} else {
 7524: #ifdef DEBUG_DERIV
 7525: 			    printf("Count, Count finite can't subsume infinite\n");
 7526: #endif
 7527:                             xmlExpFree(ctxt, tmp);
 7528: 			    return(forbiddenExp);
 7529: 			}
 7530: 		    } else {
 7531: 			if (exp->exp_max == -1) {
 7532: #ifdef DEBUG_DERIV
 7533: 			    printf("Infinite loop consume mult finite loop\n");
 7534: #endif
 7535: 			    if (exp->exp_min > sub->exp_min * mult) {
 7536: 				max = -1;
 7537: 				min = exp->exp_min - sub->exp_min * mult;
 7538: 			    } else {
 7539: 				max = -1;
 7540: 				min = 0;
 7541: 			    }
 7542: 			} else {
 7543: 			    if (exp->exp_max < sub->exp_max * mult) {
 7544: #ifdef DEBUG_DERIV
 7545: 				printf("loops max mult mismatch => forbidden\n");
 7546: #endif
 7547: 				xmlExpFree(ctxt, tmp);
 7548: 				return(forbiddenExp);
 7549: 			    }
 7550: 			    if (sub->exp_max * mult > exp->exp_min)
 7551: 				min = 0;
 7552: 			    else
 7553: 				min = exp->exp_min - sub->exp_max * mult;
 7554: 			    max = exp->exp_max - sub->exp_max * mult;
 7555: 			}
 7556: 		    }
 7557: 		} else if (!IS_NILLABLE(tmp)) {
 7558: 		    /*
 7559: 		     * TODO: loop here to try to grow if working on finite
 7560: 		     *       blocks.
 7561: 		     */
 7562: #ifdef DEBUG_DERIV
 7563: 		    printf("Count, Count remain not nillable => forbidden\n");
 7564: #endif
 7565: 		    xmlExpFree(ctxt, tmp);
 7566: 		    return(forbiddenExp);
 7567: 		} else if (sub->exp_max == -1) {
 7568: 		    if (exp->exp_max == -1) {
 7569: 		        if (exp->exp_min <= sub->exp_min) {
 7570: #ifdef DEBUG_DERIV
 7571: 			    printf("Infinite loops Okay => COUNT(0,Inf)\n");
 7572: #endif
 7573:                             max = -1;
 7574: 			    min = 0;
 7575: 			} else {
 7576: #ifdef DEBUG_DERIV
 7577: 			    printf("Infinite loops min => Count(X,Inf)\n");
 7578: #endif
 7579:                             max = -1;
 7580: 			    min = exp->exp_min - sub->exp_min;
 7581: 			}
 7582: 		    } else if (exp->exp_min > sub->exp_min) {
 7583: #ifdef DEBUG_DERIV
 7584: 			printf("loops min mismatch 1 => forbidden ???\n");
 7585: #endif
 7586: 		        xmlExpFree(ctxt, tmp);
 7587: 		        return(forbiddenExp);
 7588: 		    } else {
 7589: 			max = -1;
 7590: 			min = 0;
 7591: 		    }
 7592: 		} else {
 7593: 		    if (exp->exp_max == -1) {
 7594: #ifdef DEBUG_DERIV
 7595: 			printf("Infinite loop consume finite loop\n");
 7596: #endif
 7597: 		        if (exp->exp_min > sub->exp_min) {
 7598: 			    max = -1;
 7599: 			    min = exp->exp_min - sub->exp_min;
 7600: 			} else {
 7601: 			    max = -1;
 7602: 			    min = 0;
 7603: 			}
 7604: 		    } else {
 7605: 		        if (exp->exp_max < sub->exp_max) {
 7606: #ifdef DEBUG_DERIV
 7607: 			    printf("loops max mismatch => forbidden\n");
 7608: #endif
 7609: 			    xmlExpFree(ctxt, tmp);
 7610: 			    return(forbiddenExp);
 7611: 			}
 7612: 			if (sub->exp_max > exp->exp_min)
 7613: 			    min = 0;
 7614: 			else
 7615: 			    min = exp->exp_min - sub->exp_max;
 7616: 			max = exp->exp_max - sub->exp_max;
 7617: 		    }
 7618: 		}
 7619: #ifdef DEBUG_DERIV
 7620: 		printf("loops match => SEQ(COUNT())\n");
 7621: #endif
 7622: 		exp->exp_left->ref++;
 7623: 		tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
 7624: 		                          NULL, NULL, min, max);
 7625: 		if (tmp2 == NULL) {
 7626: 		    return(NULL);
 7627: 		}
 7628:                 ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
 7629: 		                         NULL, 0, 0);
 7630: 		return(ret);
 7631: 	    }
 7632: 	    tmp = xmlExpExpDeriveInt(ctxt, exp->exp_left, sub);
 7633: 	    if (tmp == NULL)
 7634: 		return(NULL);
 7635: 	    if (tmp == forbiddenExp) {
 7636: #ifdef DEBUG_DERIV
 7637: 		printf("loop mismatch => forbidden\n");
 7638: #endif
 7639: 		return(forbiddenExp);
 7640: 	    }
 7641: 	    if (exp->exp_min > 0)
 7642: 		min = exp->exp_min - 1;
 7643: 	    else
 7644: 		min = 0;
 7645: 	    if (exp->exp_max < 0)
 7646: 		max = -1;
 7647: 	    else
 7648: 		max = exp->exp_max - 1;
 7649: 
 7650: #ifdef DEBUG_DERIV
 7651: 	    printf("loop match => SEQ(COUNT())\n");
 7652: #endif
 7653: 	    exp->exp_left->ref++;
 7654: 	    tmp2 = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, exp->exp_left,
 7655: 				      NULL, NULL, min, max);
 7656: 	    if (tmp2 == NULL)
 7657: 		return(NULL);
 7658: 	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, tmp, tmp2,
 7659: 				     NULL, 0, 0);
 7660: 	    return(ret);
 7661: 	}
 7662:     }
 7663: 
 7664: #ifdef DEBUG_DERIV
 7665:     printf("Fallback to derivative\n");
 7666: #endif
 7667:     if (IS_NILLABLE(sub)) {
 7668:         if (!(IS_NILLABLE(exp)))
 7669: 	    return(forbiddenExp);
 7670: 	else
 7671: 	    ret = emptyExp;
 7672:     } else
 7673: 	ret = NULL;
 7674:     /*
 7675:      * here the structured derivation made no progress so
 7676:      * we use the default token based derivation to force one more step
 7677:      */
 7678:     if (ctxt->tabSize == 0)
 7679:         ctxt->tabSize = 40;
 7680: 
 7681:     tab = (const xmlChar **) xmlMalloc(ctxt->tabSize *
 7682: 	                               sizeof(const xmlChar *));
 7683:     if (tab == NULL) {
 7684: 	return(NULL);
 7685:     }
 7686: 
 7687:     /*
 7688:      * collect all the strings accepted by the subexpression on input
 7689:      */
 7690:     len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
 7691:     while (len < 0) {
 7692:         const xmlChar **temp;
 7693: 	temp = (const xmlChar **) xmlRealloc((xmlChar **) tab, ctxt->tabSize * 2 *
 7694: 	                                     sizeof(const xmlChar *));
 7695: 	if (temp == NULL) {
 7696: 	    xmlFree((xmlChar **) tab);
 7697: 	    return(NULL);
 7698: 	}
 7699: 	tab = temp;
 7700: 	ctxt->tabSize *= 2;
 7701: 	len = xmlExpGetStartInt(ctxt, sub, tab, ctxt->tabSize, 0);
 7702:     }
 7703:     for (i = 0;i < len;i++) {
 7704:         tmp = xmlExpStringDeriveInt(ctxt, exp, tab[i]);
 7705: 	if ((tmp == NULL) || (tmp == forbiddenExp)) {
 7706: 	    xmlExpFree(ctxt, ret);
 7707: 	    xmlFree((xmlChar **) tab);
 7708: 	    return(tmp);
 7709: 	}
 7710: 	tmp2 = xmlExpStringDeriveInt(ctxt, sub, tab[i]);
 7711: 	if ((tmp2 == NULL) || (tmp2 == forbiddenExp)) {
 7712: 	    xmlExpFree(ctxt, tmp);
 7713: 	    xmlExpFree(ctxt, ret);
 7714: 	    xmlFree((xmlChar **) tab);
 7715: 	    return(tmp);
 7716: 	}
 7717: 	tmp3 = xmlExpExpDeriveInt(ctxt, tmp, tmp2);
 7718: 	xmlExpFree(ctxt, tmp);
 7719: 	xmlExpFree(ctxt, tmp2);
 7720: 
 7721: 	if ((tmp3 == NULL) || (tmp3 == forbiddenExp)) {
 7722: 	    xmlExpFree(ctxt, ret);
 7723: 	    xmlFree((xmlChar **) tab);
 7724: 	    return(tmp3);
 7725: 	}
 7726: 
 7727: 	if (ret == NULL)
 7728: 	    ret = tmp3;
 7729: 	else {
 7730: 	    ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, tmp3, NULL, 0, 0);
 7731: 	    if (ret == NULL) {
 7732: 		xmlFree((xmlChar **) tab);
 7733: 	        return(NULL);
 7734: 	    }
 7735: 	}
 7736:     }
 7737:     xmlFree((xmlChar **) tab);
 7738:     return(ret);
 7739: }
 7740:     
 7741: /**
 7742:  * xmlExpExpDerive:
 7743:  * @ctxt: the expressions context
 7744:  * @exp: the englobing expression
 7745:  * @sub: the subexpression
 7746:  *
 7747:  * Evaluates the expression resulting from @exp consuming a sub expression @sub
 7748:  * Based on algebraic derivation and sometimes direct Brzozowski derivation
 7749:  * it usually tatkes less than linear time and can handle expressions generating
 7750:  * infinite languages.
 7751:  *
 7752:  * Returns the resulting expression or NULL in case of internal error, the
 7753:  *         result must be freed
 7754:  */
 7755: xmlExpNodePtr
 7756: xmlExpExpDerive(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7757:     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
 7758:         return(NULL);
 7759: 
 7760:     /*
 7761:      * O(1) speedups
 7762:      */
 7763:     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
 7764: #ifdef DEBUG_DERIV
 7765: 	printf("Sub nillable and not exp : can't subsume\n");
 7766: #endif
 7767:         return(forbiddenExp);
 7768:     }
 7769:     if (xmlExpCheckCard(exp, sub) == 0) {
 7770: #ifdef DEBUG_DERIV
 7771: 	printf("sub generate longuer sequances than exp : can't subsume\n");
 7772: #endif
 7773:         return(forbiddenExp);
 7774:     }
 7775:     return(xmlExpExpDeriveInt(ctxt, exp, sub));
 7776: }
 7777: 
 7778: /**
 7779:  * xmlExpSubsume:
 7780:  * @ctxt: the expressions context
 7781:  * @exp: the englobing expression
 7782:  * @sub: the subexpression
 7783:  *
 7784:  * Check whether @exp accepts all the languages accexpted by @sub
 7785:  * the input being a subexpression.
 7786:  *
 7787:  * Returns 1 if true 0 if false and -1 in case of failure.
 7788:  */
 7789: int
 7790: xmlExpSubsume(xmlExpCtxtPtr ctxt, xmlExpNodePtr exp, xmlExpNodePtr sub) {
 7791:     xmlExpNodePtr tmp;
 7792:     
 7793:     if ((exp == NULL) || (ctxt == NULL) || (sub == NULL))
 7794:         return(-1);
 7795: 
 7796:     /*
 7797:      * TODO: speedup by checking the language of sub is a subset of the
 7798:      *       language of exp
 7799:      */
 7800:     /*
 7801:      * O(1) speedups
 7802:      */
 7803:     if (IS_NILLABLE(sub) && (!IS_NILLABLE(exp))) {
 7804: #ifdef DEBUG_DERIV
 7805: 	printf("Sub nillable and not exp : can't subsume\n");
 7806: #endif
 7807:         return(0);
 7808:     }
 7809:     if (xmlExpCheckCard(exp, sub) == 0) {
 7810: #ifdef DEBUG_DERIV
 7811: 	printf("sub generate longuer sequances than exp : can't subsume\n");
 7812: #endif
 7813:         return(0);
 7814:     }
 7815:     tmp = xmlExpExpDeriveInt(ctxt, exp, sub);
 7816: #ifdef DEBUG_DERIV
 7817:     printf("Result derivation :\n");
 7818:     PRINT_EXP(tmp);
 7819: #endif
 7820:     if (tmp == NULL)
 7821:         return(-1);
 7822:     if (tmp == forbiddenExp)
 7823: 	return(0);
 7824:     if (tmp == emptyExp)
 7825: 	return(1);
 7826:     if ((tmp != NULL) && (IS_NILLABLE(tmp))) {
 7827:         xmlExpFree(ctxt, tmp);
 7828:         return(1);
 7829:     }
 7830:     xmlExpFree(ctxt, tmp);
 7831:     return(0);
 7832: }
 7833: 
 7834: /************************************************************************
 7835:  *									*
 7836:  *			Parsing expression 				*
 7837:  *									*
 7838:  ************************************************************************/
 7839: 
 7840: static xmlExpNodePtr xmlExpParseExpr(xmlExpCtxtPtr ctxt);
 7841: 
 7842: #undef CUR
 7843: #define CUR (*ctxt->cur)
 7844: #undef NEXT
 7845: #define NEXT ctxt->cur++;
 7846: #undef IS_BLANK
 7847: #define IS_BLANK(c) ((c == ' ') || (c == '\n') || (c == '\r') || (c == '\t'))
 7848: #define SKIP_BLANKS while (IS_BLANK(*ctxt->cur)) ctxt->cur++;
 7849: 
 7850: static int
 7851: xmlExpParseNumber(xmlExpCtxtPtr ctxt) {
 7852:     int ret = 0;
 7853: 
 7854:     SKIP_BLANKS
 7855:     if (CUR == '*') {
 7856: 	NEXT
 7857: 	return(-1);
 7858:     }
 7859:     if ((CUR < '0') || (CUR > '9'))
 7860:         return(-1);
 7861:     while ((CUR >= '0') && (CUR <= '9')) {
 7862:         ret = ret * 10 + (CUR - '0');
 7863: 	NEXT
 7864:     }
 7865:     return(ret);
 7866: }
 7867: 
 7868: static xmlExpNodePtr
 7869: xmlExpParseOr(xmlExpCtxtPtr ctxt) {
 7870:     const char *base;
 7871:     xmlExpNodePtr ret;
 7872:     const xmlChar *val;
 7873: 
 7874:     SKIP_BLANKS
 7875:     base = ctxt->cur;
 7876:     if (*ctxt->cur == '(') {
 7877:         NEXT
 7878: 	ret = xmlExpParseExpr(ctxt);
 7879: 	SKIP_BLANKS
 7880: 	if (*ctxt->cur != ')') {
 7881: 	    fprintf(stderr, "unbalanced '(' : %s\n", base);
 7882: 	    xmlExpFree(ctxt, ret);
 7883: 	    return(NULL);
 7884: 	}
 7885: 	NEXT;
 7886: 	SKIP_BLANKS
 7887: 	goto parse_quantifier;
 7888:     }
 7889:     while ((CUR != 0) && (!(IS_BLANK(CUR))) && (CUR != '(') &&
 7890:            (CUR != ')') && (CUR != '|') && (CUR != ',') && (CUR != '{') &&
 7891: 	   (CUR != '*') && (CUR != '+') && (CUR != '?') && (CUR != '}'))
 7892: 	NEXT;
 7893:     val = xmlDictLookup(ctxt->dict, BAD_CAST base, ctxt->cur - base);
 7894:     if (val == NULL)
 7895:         return(NULL);
 7896:     ret = xmlExpHashGetEntry(ctxt, XML_EXP_ATOM, NULL, NULL, val, 0, 0);
 7897:     if (ret == NULL)
 7898:         return(NULL);
 7899:     SKIP_BLANKS
 7900: parse_quantifier:
 7901:     if (CUR == '{') {
 7902:         int min, max;
 7903: 
 7904:         NEXT
 7905: 	min = xmlExpParseNumber(ctxt);
 7906: 	if (min < 0) {
 7907: 	    xmlExpFree(ctxt, ret);
 7908: 	    return(NULL);
 7909: 	}
 7910: 	SKIP_BLANKS
 7911: 	if (CUR == ',') {
 7912: 	    NEXT
 7913: 	    max = xmlExpParseNumber(ctxt);
 7914: 	    SKIP_BLANKS
 7915: 	} else
 7916: 	    max = min;
 7917: 	if (CUR != '}') {
 7918: 	    xmlExpFree(ctxt, ret);
 7919: 	    return(NULL);
 7920: 	}
 7921:         NEXT
 7922: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7923: 	                         min, max);
 7924: 	SKIP_BLANKS
 7925:     } else if (CUR == '?') {
 7926:         NEXT
 7927: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7928: 	                         0, 1);
 7929: 	SKIP_BLANKS
 7930:     } else if (CUR == '+') {
 7931:         NEXT
 7932: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7933: 	                         1, -1);
 7934: 	SKIP_BLANKS
 7935:     } else if (CUR == '*') {
 7936:         NEXT
 7937: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_COUNT, ret, NULL, NULL,
 7938: 	                         0, -1);
 7939: 	SKIP_BLANKS
 7940:     } 
 7941:     return(ret);
 7942: }
 7943: 
 7944: 
 7945: static xmlExpNodePtr
 7946: xmlExpParseSeq(xmlExpCtxtPtr ctxt) {
 7947:     xmlExpNodePtr ret, right;
 7948: 
 7949:     ret = xmlExpParseOr(ctxt);
 7950:     SKIP_BLANKS
 7951:     while (CUR == '|') {
 7952:         NEXT
 7953: 	right = xmlExpParseOr(ctxt);
 7954: 	if (right == NULL) {
 7955: 	    xmlExpFree(ctxt, ret);
 7956: 	    return(NULL);
 7957: 	}
 7958: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_OR, ret, right, NULL, 0, 0);
 7959: 	if (ret == NULL)
 7960: 	    return(NULL);
 7961:     }
 7962:     return(ret);
 7963: }
 7964: 
 7965: static xmlExpNodePtr
 7966: xmlExpParseExpr(xmlExpCtxtPtr ctxt) {
 7967:     xmlExpNodePtr ret, right;
 7968: 
 7969:     ret = xmlExpParseSeq(ctxt);
 7970:     SKIP_BLANKS
 7971:     while (CUR == ',') {
 7972:         NEXT
 7973: 	right = xmlExpParseSeq(ctxt);
 7974: 	if (right == NULL) {
 7975: 	    xmlExpFree(ctxt, ret);
 7976: 	    return(NULL);
 7977: 	}
 7978: 	ret = xmlExpHashGetEntry(ctxt, XML_EXP_SEQ, ret, right, NULL, 0, 0);
 7979: 	if (ret == NULL)
 7980: 	    return(NULL);
 7981:     }
 7982:     return(ret);
 7983: }
 7984: 
 7985: /**
 7986:  * xmlExpParse:
 7987:  * @ctxt: the expressions context
 7988:  * @expr: the 0 terminated string
 7989:  *
 7990:  * Minimal parser for regexps, it understand the following constructs
 7991:  *  - string terminals
 7992:  *  - choice operator |
 7993:  *  - sequence operator ,
 7994:  *  - subexpressions (...)
 7995:  *  - usual cardinality operators + * and ?
 7996:  *  - finite sequences  { min, max }
 7997:  *  - infinite sequences { min, * }
 7998:  * There is minimal checkings made especially no checking on strings values
 7999:  *
 8000:  * Returns a new expression or NULL in case of failure
 8001:  */
 8002: xmlExpNodePtr
 8003: xmlExpParse(xmlExpCtxtPtr ctxt, const char *expr) {
 8004:     xmlExpNodePtr ret;
 8005: 
 8006:     ctxt->expr = expr;
 8007:     ctxt->cur = expr;
 8008: 
 8009:     ret = xmlExpParseExpr(ctxt);
 8010:     SKIP_BLANKS
 8011:     if (*ctxt->cur != 0) {
 8012:         xmlExpFree(ctxt, ret);
 8013:         return(NULL);
 8014:     }
 8015:     return(ret);
 8016: }
 8017: 
 8018: static void
 8019: xmlExpDumpInt(xmlBufferPtr buf, xmlExpNodePtr expr, int glob) {
 8020:     xmlExpNodePtr c;
 8021: 
 8022:     if (expr == NULL) return;
 8023:     if (glob) xmlBufferWriteChar(buf, "(");
 8024:     switch (expr->type) {
 8025:         case XML_EXP_EMPTY:
 8026: 	    xmlBufferWriteChar(buf, "empty");
 8027: 	    break;
 8028:         case XML_EXP_FORBID:
 8029: 	    xmlBufferWriteChar(buf, "forbidden");
 8030: 	    break;
 8031:         case XML_EXP_ATOM:
 8032: 	    xmlBufferWriteCHAR(buf, expr->exp_str);
 8033: 	    break;
 8034:         case XML_EXP_SEQ:
 8035: 	    c = expr->exp_left;
 8036: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8037: 	        xmlExpDumpInt(buf, c, 1);
 8038: 	    else
 8039: 	        xmlExpDumpInt(buf, c, 0);
 8040: 	    xmlBufferWriteChar(buf, " , ");
 8041: 	    c = expr->exp_right;
 8042: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8043: 	        xmlExpDumpInt(buf, c, 1);
 8044: 	    else
 8045: 	        xmlExpDumpInt(buf, c, 0);
 8046:             break;
 8047:         case XML_EXP_OR:
 8048: 	    c = expr->exp_left;
 8049: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8050: 	        xmlExpDumpInt(buf, c, 1);
 8051: 	    else
 8052: 	        xmlExpDumpInt(buf, c, 0);
 8053: 	    xmlBufferWriteChar(buf, " | ");
 8054: 	    c = expr->exp_right;
 8055: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8056: 	        xmlExpDumpInt(buf, c, 1);
 8057: 	    else
 8058: 	        xmlExpDumpInt(buf, c, 0);
 8059:             break;
 8060:         case XML_EXP_COUNT: {
 8061: 	    char rep[40];
 8062: 	    
 8063: 	    c = expr->exp_left;
 8064: 	    if ((c->type == XML_EXP_SEQ) || (c->type == XML_EXP_OR))
 8065: 	        xmlExpDumpInt(buf, c, 1);
 8066: 	    else
 8067: 	        xmlExpDumpInt(buf, c, 0);
 8068: 	    if ((expr->exp_min == 0) && (expr->exp_max == 1)) {
 8069: 		rep[0] = '?';
 8070: 		rep[1] = 0;
 8071: 	    } else if ((expr->exp_min == 0) && (expr->exp_max == -1)) {
 8072: 		rep[0] = '*';
 8073: 		rep[1] = 0;
 8074: 	    } else if ((expr->exp_min == 1) && (expr->exp_max == -1)) {
 8075: 		rep[0] = '+';
 8076: 		rep[1] = 0;
 8077: 	    } else if (expr->exp_max == expr->exp_min) {
 8078: 	        snprintf(rep, 39, "{%d}", expr->exp_min);
 8079: 	    } else if (expr->exp_max < 0) {
 8080: 	        snprintf(rep, 39, "{%d,inf}", expr->exp_min);
 8081: 	    } else {
 8082: 	        snprintf(rep, 39, "{%d,%d}", expr->exp_min, expr->exp_max);
 8083: 	    }
 8084: 	    rep[39] = 0;
 8085: 	    xmlBufferWriteChar(buf, rep);
 8086: 	    break;
 8087: 	}
 8088: 	default:
 8089: 	    fprintf(stderr, "Error in tree\n");
 8090:     }
 8091:     if (glob)
 8092:         xmlBufferWriteChar(buf, ")");
 8093: }
 8094: /**
 8095:  * xmlExpDump:
 8096:  * @buf:  a buffer to receive the output
 8097:  * @expr:  the compiled expression
 8098:  *
 8099:  * Serialize the expression as compiled to the buffer
 8100:  */
 8101: void
 8102: xmlExpDump(xmlBufferPtr buf, xmlExpNodePtr expr) {
 8103:     if ((buf == NULL) || (expr == NULL))
 8104:         return;
 8105:     xmlExpDumpInt(buf, expr, 0);
 8106: }
 8107: 
 8108: /**
 8109:  * xmlExpMaxToken:
 8110:  * @expr: a compiled expression
 8111:  *
 8112:  * Indicate the maximum number of input a expression can accept
 8113:  *
 8114:  * Returns the maximum length or -1 in case of error
 8115:  */
 8116: int
 8117: xmlExpMaxToken(xmlExpNodePtr expr) {
 8118:     if (expr == NULL)
 8119:         return(-1);
 8120:     return(expr->c_max);
 8121: }
 8122: 
 8123: /**
 8124:  * xmlExpCtxtNbNodes:
 8125:  * @ctxt: an expression context
 8126:  *
 8127:  * Debugging facility provides the number of allocated nodes at a that point
 8128:  *
 8129:  * Returns the number of nodes in use or -1 in case of error
 8130:  */
 8131: int
 8132: xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt) {
 8133:     if (ctxt == NULL)
 8134:         return(-1);
 8135:     return(ctxt->nb_nodes);
 8136: }
 8137: 
 8138: /**
 8139:  * xmlExpCtxtNbCons:
 8140:  * @ctxt: an expression context
 8141:  *
 8142:  * Debugging facility provides the number of allocated nodes over lifetime
 8143:  *
 8144:  * Returns the number of nodes ever allocated or -1 in case of error
 8145:  */
 8146: int
 8147: xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt) {
 8148:     if (ctxt == NULL)
 8149:         return(-1);
 8150:     return(ctxt->nb_cons);
 8151: }
 8152: 
 8153: #endif /* LIBXML_EXPR_ENABLED */
 8154: #define bottom_xmlregexp
 8155: #include "elfgcchack.h"
 8156: #endif /* LIBXML_REGEXP_ENABLED */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>