embedaddon/pcre/pcre_dfa_exec.c - diff

Return to pcre_dfa_exec.c CVS log

Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre

Diff for /embedaddon/pcre/pcre_dfa_exec.c between versions 1.1.1.2 and 1.1.1.4

version 1.1.1.2, 2012/02/21 23:50:25	version 1.1.1.4, 2013/07/22 08:25:55
Line 7 and semantics are as close as possible to those of the	Line 7 and semantics are as close as possible to those of the
below for why this module is different).	below for why this module is different).

Written by Philip Hazel	Written by Philip Hazel
Copyright (c) 1997-2012 University of Cambridge	Copyright (c) 1997-2013 University of Cambridge

-----------------------------------------------------------------------------	-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without	Redistribution and use in source and binary forms, with or without
Line 38 POSSIBILITY OF SUCH DAMAGE.	Line 38 POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------	-----------------------------------------------------------------------------
*/	*/


/* This module contains the external function pcre_dfa_exec(), which is an	/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true	alternative matching function that uses a sort of DFA algorithm (not a true
FSM). This is NOT Perl- compatible, but it has advantages in certain	FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */	applications. */


Line 282 typedef struct stateblock {	Line 281 typedef struct stateblock {
int data; /* Some use extra data */	int data; /* Some use extra data */
} stateblock;	} stateblock;

#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))	#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))


#ifdef PCRE_DEBUG	#ifdef PCRE_DEBUG
Line 303 Returns: nothing	Line 302 Returns: nothing
static void	static void
pchars(const pcre_uchar p, int length, FILE f)	pchars(const pcre_uchar p, int length, FILE f)
{	{
int c;	pcre_uint32 c;
while (length-- > 0)	while (length-- > 0)
{	{
if (isprint(c = *(p++)))	if (isprint(c = *(p++)))
fprintf(f, "%c", c);	fprintf(f, "%c", c);
else	else
fprintf(f, "\\x%02x", c);	fprintf(f, "\\x{%02x}", c);
}	}
}	}
#endif	#endif
Line 382 for the current character, one for the following chara	Line 381 for the current character, one for the following chara
next_new_state->count = (y); \	next_new_state->count = (y); \
next_new_state->data = (z); \	next_new_state->data = (z); \
next_new_state++; \	next_new_state++; \
DPRINTF(("%.sADD_NEW_DATA(%d,%d,%d)\n", rlevel2-2, SP, (x), (y), (z))); \	DPRINTF(("%.sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel2-2, SP, \
	(x), (y), (z), __LINE__)); \
} \	} \
else return PCRE_ERROR_DFA_WSSIZE	else return PCRE_ERROR_DFA_WSSIZE

Line 424 BOOL utf = (md->poptions & PCRE_UTF8) != 0;	Line 424 BOOL utf = (md->poptions & PCRE_UTF8) != 0;
BOOL utf = FALSE;	BOOL utf = FALSE;
#endif	#endif

	BOOL reset_could_continue = FALSE;

rlevel++;	rlevel++;
offsetcount &= (-2);	offsetcount &= (-2);

Line 569 for (;;)	Line 571 for (;;)
{	{
int i, j;	int i, j;
int clen, dlen;	int clen, dlen;
unsigned int c, d;	pcre_uint32 c, d;
int forced_fail = 0;	int forced_fail = 0;
BOOL could_continue = FALSE;	BOOL partial_newline = FALSE;
	BOOL could_continue = reset_could_continue;
	reset_could_continue = FALSE;

/* Make the new state list into the active state list and empty the	/* Make the new state list into the active state list and empty the
new state list. */	new state list. */
Line 607 for (;;)	Line 611 for (;;)

if (ptr < end_subject)	if (ptr < end_subject)
{	{
clen = 1; /* Number of bytes in the character */	clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
if (utf) { GETCHARLEN(c, ptr, clen); } else	GETCHARLENTEST(c, ptr, clen);
#endif /* SUPPORT_UTF */	#else
c = *ptr;	c = *ptr;
	#endif /* SUPPORT_UTF */
}	}
else	else
{	{
Line 630 for (;;)	Line 635 for (;;)
BOOL caseless = FALSE;	BOOL caseless = FALSE;
const pcre_uchar *code;	const pcre_uchar *code;
int state_offset = current_state->offset;	int state_offset = current_state->offset;
int count, codevalue, rrc;	int codevalue, rrc;
	int count;

#ifdef PCRE_DEBUG	#ifdef PCRE_DEBUG
printf ("%.sProcessing state %d c=", rlevel2-2, SP, state_offset);	printf ("%.sProcessing state %d c=", rlevel2-2, SP, state_offset);
Line 641 for (;;)	Line 647 for (;;)

/* A negative offset is a special case meaning "hold off going to this	/* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have	(negated) state until the number of characters in the data field have
been skipped". */	been skipped". If the could_continue flag was passed over from a previous
	state, arrange for it to passed on. */

if (state_offset < 0)	if (state_offset < 0)
{	{
Line 650 for (;;)	Line 657 for (;;)
DPRINTF(("%.sSkipping this character\n", rlevel2-2, SP));	DPRINTF(("%.sSkipping this character\n", rlevel2-2, SP));
ADD_NEW_DATA(state_offset, current_state->count,	ADD_NEW_DATA(state_offset, current_state->count,
current_state->data - 1);	current_state->data - 1);
	if (could_continue) reset_could_continue = TRUE;
continue;	continue;
}	}
else	else
Line 689 for (;;)	Line 697 for (;;)
permitted.	permitted.

We also use this mechanism for opcodes such as OP_TYPEPLUS that take an	We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
argument that is not a data character - but is always one byte long. We	argument that is not a data character - but is always one byte long because
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in	the values are small. We have to take special action to deal with \P, \p,
this case. To keep the other cases fast, convert these ones to new opcodes.	\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
*/	these ones to new opcodes. */

if (coptable[codevalue] > 0)	if (coptable[codevalue] > 0)
{	{
Line 783 for (;;)	Line 791 for (;;)
offsets[0] = (int)(current_subject - start_subject);	offsets[0] = (int)(current_subject - start_subject);
offsets[1] = (int)(ptr - start_subject);	offsets[1] = (int)(ptr - start_subject);
DPRINTF(("%.sSet matched string = \"%.s\"\n", rlevel*2-2, SP,	DPRINTF(("%.sSet matched string = \"%.s\"\n", rlevel*2-2, SP,
offsets[1] - offsets[0], current_subject));	offsets[1] - offsets[0], (char *)current_subject));
}	}
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)	if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
{	{
Line 888 for (;;)	Line 896 for (;;)
/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
case OP_ANY:	case OP_ANY:
if (clen > 0 && !IS_NEWLINE(ptr))	if (clen > 0 && !IS_NEWLINE(ptr))
{ ADD_NEW(state_offset + 1, 0); }	{
	if (ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else
	{
	ADD_NEW(state_offset + 1, 0);
	}
	}
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
Line 916 for (;;)	Line 937 for (;;)
(ptr == end_subject - md->nllen)	(ptr == end_subject - md->nllen)
))	))
{ ADD_ACTIVE(state_offset + 1, 0); }	{ ADD_ACTIVE(state_offset + 1, 0); }
	else if (ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
	{
	reset_could_continue = TRUE;
	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
	}
	else could_continue = partial_newline = TRUE;
	}
}	}
break;	break;

Line 928 for (;;)	Line 962 for (;;)
else if (clen == 0 \|\|	else if (clen == 0 \|\|
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))	((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }	{ ADD_ACTIVE(state_offset + 1, 0); }
	else if (ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
	{
	reset_could_continue = TRUE;
	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
	}
	else could_continue = partial_newline = TRUE;
	}
}	}
else if (IS_NEWLINE(ptr))	else if (IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); }	{ ADD_ACTIVE(state_offset + 1, 0); }
Line 962 for (;;)	Line 1009 for (;;)
{	{
const pcre_uchar *temp = ptr - 1;	const pcre_uchar *temp = ptr - 1;
if (temp < md->start_used_ptr) md->start_used_ptr = temp;	if (temp < md->start_used_ptr) md->start_used_ptr = temp;
#ifdef SUPPORT_UTF	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf) { BACKCHAR(temp); }	if (utf) { BACKCHAR(temp); }
#endif	#endif
GETCHARTEST(d, temp);	GETCHARTEST(d, temp);
Line 1015 for (;;)	Line 1062 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[1])	switch(code[1])
{	{
Line 1063 for (;;)	Line 1111 for (;;)
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[2];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1090 for (;;)	Line 1153 for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1113 for (;;)	Line 1184 for (;;)
ADD_ACTIVE(state_offset + 2, 0);	ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1135 for (;;)	Line 1214 for (;;)
ADD_ACTIVE(state_offset + 2, 0);	ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1155 for (;;)	Line 1242 for (;;)
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{	{
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }	{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
Line 1176 for (;;)	Line 1271 for (;;)
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1186 for (;;)	Line 1289 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }	{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
Line 1209 for (;;)	Line 1312 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[2])	switch(code[2])
{	{
Line 1257 for (;;)	Line 1361 for (;;)
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[3];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1283 for (;;)	Line 1402 for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:	case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count; /* Already matched */	count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
	int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)	if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
Line 1292 for (;;)	Line 1412 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nd;	dlen = 1;
int ndlen = 1;	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
GETCHARLEN(nd, nptr, ndlen);	rgb = UCD_GRAPHBREAK(d);
if (UCD_CATEGORY(nd) != ucp_M) break;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += ndlen;	lgb = rgb;
	nptr += dlen;
}	}
count++;	count++;
ADD_NEW_DATA(-state_offset, count, ncount);	ADD_NEW_DATA(-state_offset, count, ncount);
Line 1318 for (;;)	Line 1440 for (;;)
int ncount = 0;	int ncount = 0;
switch (c)	switch (c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL01;	goto ANYNL01;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;	if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */	/* Fall through */

ANYNL01:	ANYNL01:
case 0x000a:	case CHAR_LF:
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)	if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
Line 1358 for (;;)	Line 1482 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
OK = TRUE;	OK = TRUE;
break;	break;

Line 1397 for (;;)	Line 1515 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
OK = TRUE;	OK = TRUE;
break;	break;

Line 1456 for (;;)	Line 1556 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[2])	switch(code[2])
{	{
Line 1504 for (;;)	Line 1605 for (;;)
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[3];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1539 for (;;)	Line 1655 for (;;)
QS2:	QS2:

ADD_ACTIVE(state_offset + 2, 0);	ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
	int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|
Line 1549 for (;;)	Line 1666 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nd;	dlen = 1;
int ndlen = 1;	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
GETCHARLEN(nd, nptr, ndlen);	rgb = UCD_GRAPHBREAK(d);
if (UCD_CATEGORY(nd) != ucp_M) break;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += ndlen;	lgb = rgb;
	nptr += dlen;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);	ADD_NEW_DATA(-(state_offset + count), 0, ncount);
}	}
Line 1582 for (;;)	Line 1701 for (;;)
int ncount = 0;	int ncount = 0;
switch (c)	switch (c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL02;	goto ANYNL02;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;	if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */	/* Fall through */

ANYNL02:	ANYNL02:
case 0x000a:	case CHAR_LF:
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)	codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);	ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
break;	break;

default:	default:
Line 1630 for (;;)	Line 1751 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
OK = TRUE;	OK = TRUE;
break;	break;

Line 1652 for (;;)	Line 1767 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, 0);	ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}	}
}	}
break;	break;
Line 1676 for (;;)	Line 1791 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
OK = TRUE;	OK = TRUE;
break;	break;

Line 1711 for (;;)	Line 1808 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, 0);	ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}	}
}	}
break;	break;
Line 1728 for (;;)	Line 1825 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[1 + IMM2_SIZE + 1])	switch(code[1 + IMM2_SIZE + 1])
{	{
Line 1776 for (;;)	Line 1874 for (;;)
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1790 for (;;)	Line 1903 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }	{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
Line 1806 for (;;)	Line 1919 for (;;)
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)	if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }	{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
	int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
Line 1815 for (;;)	Line 1929 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nd;	dlen = 1;
int ndlen = 1;	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
GETCHARLEN(nd, nptr, ndlen);	rgb = UCD_GRAPHBREAK(d);
if (UCD_CATEGORY(nd) != ucp_M) break;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += ndlen;	lgb = rgb;
	nptr += dlen;
}	}
if (++count >= GET2(code, 1))	if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
	reset_could_continue = TRUE;
	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else	else
{ ADD_NEW_DATA(-state_offset, count, ncount); }	{ ADD_NEW_DATA(-state_offset, count, ncount); }
Line 1845 for (;;)	Line 1963 for (;;)
int ncount = 0;	int ncount = 0;
switch (c)	switch (c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL03;	goto ANYNL03;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;	if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */	/* Fall through */

ANYNL03:	ANYNL03:
case 0x000a:	case CHAR_LF:
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else	else
{ ADD_NEW_DATA(-state_offset, count, ncount); }	{ ADD_NEW_DATA(-state_offset, count, ncount); }
Line 1889 for (;;)	Line 2009 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
OK = TRUE;	OK = TRUE;
break;	break;

Line 1910 for (;;)	Line 2024 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else	else
{ ADD_NEW_DATA(-state_offset, count, 0); }	{ ADD_NEW_DATA(-state_offset, count, 0); }
Line 1931 for (;;)	Line 2045 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
OK = TRUE;	OK = TRUE;
break;	break;

Line 1965 for (;;)	Line 2061 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else	else
{ ADD_NEW_DATA(-state_offset, count, 0); }	{ ADD_NEW_DATA(-state_offset, count, 0); }
Line 2025 for (;;)	Line 2121 for (;;)
to wait for them to pass before continuing. */	to wait for them to pass before continuing. */

case OP_EXTUNI:	case OP_EXTUNI:
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
	int lgb, rgb;
const pcre_uchar *nptr = ptr + clen;	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nclen = 1;	dlen = 1;
GETCHARLEN(c, nptr, nclen);	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
if (UCD_CATEGORY(c) != ucp_M) break;	rgb = UCD_GRAPHBREAK(d);
	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += nclen;	lgb = rgb;
	nptr += dlen;
}	}
	if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
	reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);	ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
}	}
break;	break;
Line 2050 for (;;)	Line 2152 for (;;)
case OP_ANYNL:	case OP_ANYNL:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;

case 0x000a:	case CHAR_LF:
ADD_NEW(state_offset + 1, 0);	ADD_NEW(state_offset + 1, 0);
break;	break;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a)	if (ptr + 1 >= end_subject)
{	{
	ADD_NEW(state_offset + 1, 0);
	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
	reset_could_continue = TRUE;
	}
	else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
	{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}	}
else	else
Line 2078 for (;;)	Line 2188 for (;;)
case OP_NOT_VSPACE:	case OP_NOT_VSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
break;	break;

default:	default:
Line 2097 for (;;)	Line 2201 for (;;)
case OP_VSPACE:	case OP_VSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
ADD_NEW(state_offset + 1, 0);	ADD_NEW(state_offset + 1, 0);
break;	break;

default: break;	default:
	break;
}	}
break;	break;

Line 2115 for (;;)	Line 2214 for (;;)
case OP_NOT_HSPACE:	case OP_NOT_HSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
break;	break;

default:	default:
Line 2146 for (;;)	Line 2227 for (;;)
case OP_HSPACE:	case OP_HSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
ADD_NEW(state_offset + 1, 0);	ADD_NEW(state_offset + 1, 0);
break;	break;

	default:
	break;
}	}
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
/* Match a negated single character casefully. This is only used for	/* Match a negated single character casefully. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */

case OP_NOT:	case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }	if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
/* Match a negated single character caselessly. This is only used for	/* Match a negated single character caselessly. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */

case OP_NOTI:	case OP_NOTI:
if (clen > 0 && c != d && c != fcc[d])	if (clen > 0)
{ ADD_NEW(state_offset + dlen + 1, 0); }	{
	unsigned int otherd;
	#ifdef SUPPORT_UTF
	if (utf && d >= 128)
	{
	#ifdef SUPPORT_UCP
	otherd = UCD_OTHERCASE(d);
	#endif /* SUPPORT_UCP */
	}
	else
	#endif /* SUPPORT_UTF */
	otherd = TABLE_GET(d, fcc, d);
	if (c != d && c != otherd)
	{ ADD_NEW(state_offset + dlen + 1, 0); }
	}
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
Line 2210 for (;;)	Line 2286 for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }	if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
Line 2257 for (;;)	Line 2333 for (;;)
ADD_ACTIVE(state_offset + dlen + 1, 0);	ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
Line 2302 for (;;)	Line 2378 for (;;)
ADD_ACTIVE(state_offset + dlen + 1, 0);	ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
Line 2339 for (;;)	Line 2415 for (;;)
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
Line 2355 for (;;)	Line 2431 for (;;)
}	}
if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
{	{
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }	{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
Line 2383 for (;;)	Line 2459 for (;;)
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
Line 2404 for (;;)	Line 2480 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }	{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
Line 2477 for (;;)	Line 2553 for (;;)
case OP_CRRANGE:	case OP_CRRANGE:
case OP_CRMINRANGE:	case OP_CRMINRANGE:
count = current_state->count; /* Already matched */	count = current_state->count; /* Already matched */
if (count >= GET2(ecode, 1))	if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }	{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)	if (isinclass)
{	{
int max = GET2(ecode, 1 + IMM2_SIZE);	int max = (int)GET2(ecode, 1 + IMM2_SIZE);
if (++count >= max && max != 0) /* Max 0 => no limit */	if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }	{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else	else
Line 2557 for (;;)	Line 2633 for (;;)
cb.version = 1; /* Version 1 of the callout block */	cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[LINK_SIZE+2];	cb.callout_number = code[LINK_SIZE+2];
cb.offset_vector = offsets;	cb.offset_vector = offsets;
#ifdef COMPILE_PCRE8	#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;	cb.subject = (PCRE_SPTR)start_subject;
#else	#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)start_subject;	cb.subject = (PCRE_SPTR16)start_subject;
	#elif defined COMPILE_PCRE32
	cb.subject = (PCRE_SPTR32)start_subject;
#endif	#endif
cb.subject_length = (int)(end_subject - start_subject);	cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);	cb.start_match = (int)(current_subject - start_subject);
Line 2691 for (;;)	Line 2769 for (;;)
for (rc = rc*2 - 2; rc >= 0; rc -= 2)	for (rc = rc*2 - 2; rc >= 0; rc -= 2)
{	{
int charcount = local_offsets[rc+1] - local_offsets[rc];	int charcount = local_offsets[rc+1] - local_offsets[rc];
#ifdef SUPPORT_UTF	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
const pcre_uchar *p = start_subject + local_offsets[rc];	if (utf)
const pcre_uchar *pp = start_subject + local_offsets[rc+1];	{
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;	const pcre_uchar *p = start_subject + local_offsets[rc];
	const pcre_uchar *pp = start_subject + local_offsets[rc+1];
	while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
	}
#endif	#endif
if (charcount > 0)	if (charcount > 0)
{	{
Line 2792 for (;;)	Line 2873 for (;;)
const pcre_uchar *p = ptr;	const pcre_uchar *p = ptr;
const pcre_uchar *pp = local_ptr;	const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p);	charcount = (int)(pp - p);
#ifdef SUPPORT_UTF	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;	if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
#endif	#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));	ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}	}
Line 2874 for (;;)	Line 2955 for (;;)
}	}
else	else
{	{
#ifdef SUPPORT_UTF	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
const pcre_uchar *p = start_subject + local_offsets[0];	if (utf)
const pcre_uchar *pp = start_subject + local_offsets[1];	{
while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;	const pcre_uchar *p = start_subject + local_offsets[0];
	const pcre_uchar *pp = start_subject + local_offsets[1];
	while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
	}
#endif	#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));	ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)	if (repeat_state_offset >= 0)
Line 2900 for (;;)	Line 2984 for (;;)
cb.version = 1; /* Version 1 of the callout block */	cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[1];	cb.callout_number = code[1];
cb.offset_vector = offsets;	cb.offset_vector = offsets;
#ifdef COMPILE_PCRE8	#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;	cb.subject = (PCRE_SPTR)start_subject;
#else	#elif defined COMPILE_PCRE16
cb.subject = (PCRE_SPTR16)start_subject;	cb.subject = (PCRE_SPTR16)start_subject;
	#elif defined COMPILE_PCRE32
	cb.subject = (PCRE_SPTR32)start_subject;
#endif	#endif
cb.subject_length = (int)(end_subject - start_subject);	cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);	cb.start_match = (int)(current_subject - start_subject);
Line 2946 for (;;)	Line 3032 for (;;)
if (new_count <= 0)	if (new_count <= 0)
{	{
if (rlevel == 1 && /* Top level, and */	if (rlevel == 1 && /* Top level, and */
could_continue && /* Some could go on */	could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */	forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */	( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */	(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
Line 2954 for (;;)	Line 3040 for (;;)
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */	((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
match_count < 0) /* no matches */	match_count < 0) /* no matches */
) && /* And... */	) && /* And... */
ptr >= end_subject && /* Reached end of subject */	(
ptr > md->start_used_ptr) /* Inspected non-empty string */	partial_newline \|\| /* Either partial NL */
{	( /* or ... */
if (offsetcount >= 2)	ptr >= end_subject && /* End of subject and */
{	ptr > md->start_used_ptr) /* Inspected non-empty string */
offsets[0] = (int)(md->start_used_ptr - start_subject);	)
offsets[1] = (int)(end_subject - start_subject);	)
}
match_count = PCRE_ERROR_PARTIAL;	match_count = PCRE_ERROR_PARTIAL;
}

DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"	DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.s---------------------\n\n", rlevel2-2, SP, rlevel, match_count,	"%.s---------------------\n\n", rlevel2-2, SP, rlevel, match_count,
rlevel*2-2, SP));	rlevel*2-2, SP));
Line 3014 Returns: > 0 => number of match offset pairs	Line 3097 Returns: > 0 => number of match offset pairs
< -1 => some kind of unexpected problem	< -1 => some kind of unexpected problem
*/	*/

#ifdef COMPILE_PCRE8	#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre argument_re, const pcre_extra extra_data,	pcre_dfa_exec(const pcre argument_re, const pcre_extra extra_data,
const char subject, int length, int start_offset, int options, int offsets,	const char subject, int length, int start_offset, int options, int offsets,
int offsetcount, int *workspace, int wscount)	int offsetcount, int *workspace, int wscount)
#else	#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre16_dfa_exec(const pcre16 argument_re, const pcre16_extra extra_data,	pcre16_dfa_exec(const pcre16 argument_re, const pcre16_extra extra_data,
PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,	PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)	int offsetcount, int *workspace, int wscount)
	#elif defined COMPILE_PCRE32
	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
	pcre32_dfa_exec(const pcre32 argument_re, const pcre32_extra extra_data,
	PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
	int offsetcount, int *workspace, int wscount)
#endif	#endif
{	{
REAL_PCRE re = (REAL_PCRE )argument_re;	REAL_PCRE re = (REAL_PCRE )argument_re;
Line 3050 if (re == NULL \|\| subject == NULL \|\| workspace == NULL	Line 3138 if (re == NULL \|\| subject == NULL \|\| workspace == NULL
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;	(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;	if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;	if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
	if (length < 0) return PCRE_ERROR_BADLENGTH;
if (start_offset < 0 \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;	if (start_offset < 0 \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;

/* We need to find the pointer to any study data before we test for byte	/* Check that the first field in the block is the magic number. If it is not,
flipping, so we scan the extra_data block first. This may set two fields in the	return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
match block, so we must initialize them beforehand. However, the other fields	REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
in the match block must not be set until after the byte flipping. */	means that the pattern is likely compiled with different endianness. */

	if (re->magic_number != MAGIC_NUMBER)
	return re->magic_number == REVERSED_MAGIC_NUMBER?
	PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
	if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;

	/* If restarting after a partial match, do some sanity checks on the contents
	of the workspace. */

	if ((options & PCRE_DFA_RESTART) != 0)
	{
	if ((workspace[0] & (-2)) != 0 \|\| workspace[1] < 1 \|\|
	workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
	return PCRE_ERROR_DFA_BADRESTART;
	}

	/* Set up study, callout, and table data */

md->tables = re->tables;	md->tables = re->tables;
md->callout_data = NULL;	md->callout_data = NULL;

Line 3074 if (extra_data != NULL)	Line 3180 if (extra_data != NULL)
md->tables = extra_data->tables;	md->tables = extra_data->tables;
}	}

/* Check that the first field in the block is the magic number. If it is not,
return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
means that the pattern is likely compiled with different endianness. */

if (re->magic_number != MAGIC_NUMBER)
return re->magic_number == REVERSED_MAGIC_NUMBER?
PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;

/* Set some local values */	/* Set some local values */

current_subject = (const pcre_uchar *)subject + start_offset;	current_subject = (const pcre_uchar *)subject + start_offset;
Line 3091 end_subject = (const pcre_uchar *)subject + length;	Line 3187 end_subject = (const pcre_uchar *)subject + length;
req_char_ptr = current_subject - 1;	req_char_ptr = current_subject - 1;

#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
/* PCRE_UTF16 has the same value as PCRE_UTF8. */	/* PCRE_UTF(16\|32) have the same value as PCRE_UTF8. */
utf = (re->options & PCRE_UTF8) != 0;	utf = (re->options & PCRE_UTF8) != 0;
#else	#else
utf = FALSE;	utf = FALSE;
Line 3177 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)	Line 3273 if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
offsets[0] = erroroffset;	offsets[0] = erroroffset;
offsets[1] = errorcode;	offsets[1] = errorcode;
}	}
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?	#if defined COMPILE_PCRE8
	return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;	PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
	#elif defined COMPILE_PCRE16
	return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
	PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
	#elif defined COMPILE_PCRE32
	return PCRE_ERROR_BADUTF32;
	#endif
}	}
	#if defined COMPILE_PCRE8 \|\| defined COMPILE_PCRE16
if (start_offset > 0 && start_offset < length &&	if (start_offset > 0 && start_offset < length &&
NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))	NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;	return PCRE_ERROR_BADUTF8_OFFSET;
	#endif
}	}
#endif	#endif

Line 3292 for (;;)	Line 3397 for (;;)
if (has_first_char)	if (has_first_char)
{	{
if (first_char != first_char2)	if (first_char != first_char2)
	{
	pcre_uchar csc;
while (current_subject < end_subject &&	while (current_subject < end_subject &&
current_subject != first_char && current_subject != first_char2)	(csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
current_subject++;	current_subject++;
	}
else	else
while (current_subject < end_subject &&	while (current_subject < end_subject &&
*current_subject != first_char)	RAWUCHARTEST(current_subject) != first_char)
current_subject++;	current_subject++;
}	}

Line 3327 for (;;)	Line 3435 for (;;)
ANYCRLF, and we are now at a LF, advance the match position by one	ANYCRLF, and we are now at a LF, advance the match position by one
more character. */	more character. */

if (current_subject[-1] == CHAR_CR &&	if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&	(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&
current_subject < end_subject &&	current_subject < end_subject &&
*current_subject == CHAR_NL)	RAWUCHARTEST(current_subject) == CHAR_NL)
current_subject++;	current_subject++;
}	}
}	}
Line 3341 for (;;)	Line 3449 for (;;)
{	{
while (current_subject < end_subject)	while (current_subject < end_subject)
{	{
register unsigned int c = *current_subject;	register pcre_uint32 c = RAWUCHARTEST(current_subject);
#ifndef COMPILE_PCRE8	#ifndef COMPILE_PCRE8
if (c > 255) c = 255;	if (c > 255) c = 255;
#endif	#endif
Line 3407 for (;;)	Line 3515 for (;;)
{	{
while (p < end_subject)	while (p < end_subject)
{	{
register int pp = *p++;	register pcre_uint32 pp = RAWUCHARINCTEST(p);
if (pp == req_char \|\| pp == req_char2) { p--; break; }	if (pp == req_char \|\| pp == req_char2) { p--; break; }
}	}
}	}
Line 3415 for (;;)	Line 3523 for (;;)
{	{
while (p < end_subject)	while (p < end_subject)
{	{
if (*p++ == req_char) { p--; break; }	if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
}	}
}	}

Line 3453 for (;;)	Line 3561 for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry	/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */	on only if not anchored. */

if (rc != PCRE_ERROR_NOMATCH \|\| anchored) return rc;	if (rc != PCRE_ERROR_NOMATCH \|\| anchored)
	{
	if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
	{
	offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
	offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
	if (offsetcount > 2)
	offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
	}
	return rc;
	}

/* Advance to the next subject character unless we are at the end of a line	/* Advance to the next subject character unless we are at the end of a line
and firstline is set. */	and firstline is set. */
Line 3473 for (;;)	Line 3591 for (;;)
not contain any explicit matches for \r or \n, and the newline option is CRLF	not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */	or ANY or ANYCRLF, advance the match position by one more character. */

if (current_subject[-1] == CHAR_CR &&	if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&	current_subject < end_subject &&
*current_subject == CHAR_NL &&	RAWUCHARTEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&	(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY \|\|	(md->nltype == NLTYPE_ANY \|\|
md->nltype == NLTYPE_ANYCRLF \|\|	md->nltype == NLTYPE_ANYCRLF \|\|

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1.1.2
changed lines
	Added in v.1.1.1.4