embedaddon/pcre/pcre_dfa_exec.c - diff

Return to pcre_dfa_exec.c CVS log

Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre

Diff for /embedaddon/pcre/pcre_dfa_exec.c between versions 1.1.1.1 and 1.1.1.4

version 1.1.1.1, 2012/02/21 23:05:51	version 1.1.1.4, 2013/07/22 08:25:55
Line 7 and semantics are as close as possible to those of the	Line 7 and semantics are as close as possible to those of the
below for why this module is different).	below for why this module is different).

Written by Philip Hazel	Written by Philip Hazel
Copyright (c) 1997-2011 University of Cambridge	Copyright (c) 1997-2013 University of Cambridge

-----------------------------------------------------------------------------	-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without	Redistribution and use in source and binary forms, with or without
Line 38 POSSIBILITY OF SUCH DAMAGE.	Line 38 POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------	-----------------------------------------------------------------------------
*/	*/


/* This module contains the external function pcre_dfa_exec(), which is an	/* This module contains the external function pcre_dfa_exec(), which is an
alternative matching function that uses a sort of DFA algorithm (not a true	alternative matching function that uses a sort of DFA algorithm (not a true
FSM). This is NOT Perl- compatible, but it has advantages in certain	FSM). This is NOT Perl-compatible, but it has advantages in certain
applications. */	applications. */


Line 113 small value. Non-zero values in the table are the offs	Line 112 small value. Non-zero values in the table are the offs
the character is to be found. *NOTE* If the start of this table is	the character is to be found. *NOTE* If the start of this table is
modified, the three tables that follow must also be modified. */	modified, the three tables that follow must also be modified. */

static const uschar coptable[] = {	static const pcre_uint8 coptable[] = {
0, /* End */	0, /* End */
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */	0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */	0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
Line 128 static const uschar coptable[] = {	Line 127 static const uschar coptable[] = {
1, /* noti */	1, /* noti */
/* Positive single-char repeats */	/* Positive single-char repeats */
1, 1, 1, 1, 1, 1, /* , ?, +, +?, ?, ?? */	1, 1, 1, 1, 1, 1, /* , ?, +, +?, ?, ?? */
3, 3, 3, /* upto, minupto, exact */	1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
1, 1, 1, 3, /* +, ++, ?+, upto+ /	1+IMM2_SIZE, /* exact */
	1, 1, 1, 1+IMM2_SIZE, /* +, ++, ?+, upto+ /
1, 1, 1, 1, 1, 1, /* I, ?I, +I, +?I, ?I, ??I */	1, 1, 1, 1, 1, 1, /* I, ?I, +I, +?I, ?I, ??I */
3, 3, 3, /* upto I, minupto I, exact I */	1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
1, 1, 1, 3, /* +I, ++I, ?+I, upto+I /	1+IMM2_SIZE, /* exact I */
	1, 1, 1, 1+IMM2_SIZE, /* +I, ++I, ?+I, upto+I /
/* Negative single-char repeats - only for chars < 256 */	/* Negative single-char repeats - only for chars < 256 */
1, 1, 1, 1, 1, 1, /* NOT , ?, +, +?, ?, ?? */	1, 1, 1, 1, 1, 1, /* NOT , ?, +, +?, ?, ?? */
3, 3, 3, /* NOT upto, minupto, exact */	1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
1, 1, 1, 3, /* NOT +, ++, ?+, upto+ /	1+IMM2_SIZE, /* NOT exact */
	1, 1, 1, 1+IMM2_SIZE, /* NOT +, ++, ?+, upto+ /
1, 1, 1, 1, 1, 1, /* NOT I, ?I, +I, +?I, ?I, ??I */	1, 1, 1, 1, 1, 1, /* NOT I, ?I, +I, +?I, ?I, ??I */
3, 3, 3, /* NOT upto I, minupto I, exact I */	1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
1, 1, 1, 3, /* NOT +I, ++I, ?+I, upto+I /	1+IMM2_SIZE, /* NOT exact I */
	1, 1, 1, 1+IMM2_SIZE, /* NOT +I, ++I, ?+I, upto+I /
/* Positive type repeats */	/* Positive type repeats */
1, 1, 1, 1, 1, 1, /* Type , ?, +, +?, ?, ?? */	1, 1, 1, 1, 1, 1, /* Type , ?, +, +?, ?, ?? */
3, 3, 3, /* Type upto, minupto, exact */	1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
1, 1, 1, 3, /* Type +, ++, ?+, upto+ /	1+IMM2_SIZE, /* Type exact */
	1, 1, 1, 1+IMM2_SIZE, /* Type +, ++, ?+, upto+ /
/* Character class & ref repeats */	/* Character class & ref repeats */
0, 0, 0, 0, 0, 0, /* , ?, +, +?, ?, ?? */	0, 0, 0, 0, 0, 0, /* , ?, +, +?, ?, ?? */
0, 0, /* CRRANGE, CRMINRANGE */	0, 0, /* CRRANGE, CRMINRANGE */
Line 182 remember the fact that a character could have been ins	Line 186 remember the fact that a character could have been ins
the subject is reached. *NOTE* If the start of this table is modified, the	the subject is reached. *NOTE* If the start of this table is modified, the
two tables that follow must also be modified. */	two tables that follow must also be modified. */

static const uschar poptable[] = {	static const pcre_uint8 poptable[] = {
0, /* End */	0, /* End */
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */	0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */	1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
Line 249 static const uschar poptable[] = {	Line 253 static const uschar poptable[] = {
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,	/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
and \w */	and \w */

static const uschar toptable1[] = {	static const pcre_uint8 toptable1[] = {
0, 0, 0, 0, 0, 0,	0, 0, 0, 0, 0, 0,
ctype_digit, ctype_digit,	ctype_digit, ctype_digit,
ctype_space, ctype_space,	ctype_space, ctype_space,
Line 257 static const uschar toptable1[] = {	Line 261 static const uschar toptable1[] = {
0, 0 /* OP_ANY, OP_ALLANY */	0, 0 /* OP_ANY, OP_ALLANY */
};	};

static const uschar toptable2[] = {	static const pcre_uint8 toptable2[] = {
0, 0, 0, 0, 0, 0,	0, 0, 0, 0, 0, 0,
ctype_digit, 0,	ctype_digit, 0,
ctype_space, 0,	ctype_space, 0,
Line 277 typedef struct stateblock {	Line 281 typedef struct stateblock {
int data; /* Some use extra data */	int data; /* Some use extra data */
} stateblock;	} stateblock;

#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))	#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))


#ifdef PCRE_DEBUG	#ifdef PCRE_DEBUG
Line 296 Returns: nothing	Line 300 Returns: nothing
*/	*/

static void	static void
pchars(unsigned char p, int length, FILE f)	pchars(const pcre_uchar p, int length, FILE f)
{	{
int c;	pcre_uint32 c;
while (length-- > 0)	while (length-- > 0)
{	{
if (isprint(c = *(p++)))	if (isprint(c = *(p++)))
fprintf(f, "%c", c);	fprintf(f, "%c", c);
else	else
fprintf(f, "\\x%02x", c);	fprintf(f, "\\x{%02x}", c);
}	}
}	}
#endif	#endif
Line 377 for the current character, one for the following chara	Line 381 for the current character, one for the following chara
next_new_state->count = (y); \	next_new_state->count = (y); \
next_new_state->data = (z); \	next_new_state->data = (z); \
next_new_state++; \	next_new_state++; \
DPRINTF(("%.sADD_NEW_DATA(%d,%d,%d)\n", rlevel2-2, SP, (x), (y), (z))); \	DPRINTF(("%.sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel2-2, SP, \
	(x), (y), (z), __LINE__)); \
} \	} \
else return PCRE_ERROR_DFA_WSSIZE	else return PCRE_ERROR_DFA_WSSIZE

Line 386 for the current character, one for the following chara	Line 391 for the current character, one for the following chara
static int	static int
internal_dfa_exec(	internal_dfa_exec(
dfa_match_data *md,	dfa_match_data *md,
const uschar *this_start_code,	const pcre_uchar *this_start_code,
const uschar *current_subject,	const pcre_uchar *current_subject,
int start_offset,	int start_offset,
int *offsets,	int *offsets,
int offsetcount,	int offsetcount,
Line 398 internal_dfa_exec(	Line 403 internal_dfa_exec(
stateblock active_states, new_states, *temp_states;	stateblock active_states, new_states, *temp_states;
stateblock next_active_state, next_new_state;	stateblock next_active_state, next_new_state;

const uschar ctypes, lcc, *fcc;	const pcre_uint8 ctypes, lcc, *fcc;
const uschar *ptr;	const pcre_uchar *ptr;
const uschar end_code, first_op;	const pcre_uchar end_code, first_op;

dfa_recursion_info new_recursive;	dfa_recursion_info new_recursive;

Line 409 int active_count, new_count, match_count;	Line 414 int active_count, new_count, match_count;
/* Some fields in the md block are frequently referenced, so we load them into	/* Some fields in the md block are frequently referenced, so we load them into
independent variables in the hope that this will perform better. */	independent variables in the hope that this will perform better. */

const uschar *start_subject = md->start_subject;	const pcre_uchar *start_subject = md->start_subject;
const uschar *end_subject = md->end_subject;	const pcre_uchar *end_subject = md->end_subject;
const uschar *start_code = md->start_code;	const pcre_uchar *start_code = md->start_code;

#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;	BOOL utf = (md->poptions & PCRE_UTF8) != 0;
#else	#else
BOOL utf8 = FALSE;	BOOL utf = FALSE;
#endif	#endif

	BOOL reset_could_continue = FALSE;

rlevel++;	rlevel++;
offsetcount &= (-2);	offsetcount &= (-2);

Line 442 new_count = 0;	Line 449 new_count = 0;

first_op = this_start_code + 1 + LINK_SIZE +	first_op = this_start_code + 1 + LINK_SIZE +
((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)? 2:0);	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)
	? IMM2_SIZE:0);

/* The first thing in any (sub) pattern is a bracket of some sort. Push all	/* The first thing in any (sub) pattern is a bracket of some sort. Push all
the alternative states onto the list, and find out where the end is. This	the alternative states onto the list, and find out where the end is. This
Line 470 if (*first_op == OP_REVERSE)	Line 478 if (*first_op == OP_REVERSE)
/* If we can't go back the amount required for the longest lookbehind	/* If we can't go back the amount required for the longest lookbehind
pattern, go back as far as we can; some alternatives may still be viable. */	pattern, go back as far as we can; some alternatives may still be viable. */

#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
/* In character mode we have to step back character by character */	/* In character mode we have to step back character by character */

if (utf8)	if (utf)
{	{
for (gone_back = 0; gone_back < max_back; gone_back++)	for (gone_back = 0; gone_back < max_back; gone_back++)
{	{
if (current_subject <= start_subject) break;	if (current_subject <= start_subject) break;
current_subject--;	current_subject--;
while (current_subject > start_subject &&	ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
(*current_subject & 0xc0) == 0x80)
current_subject--;
}	}
}	}
else	else
Line 542 else	Line 548 else
{	{
int length = 1 + LINK_SIZE +	int length = 1 + LINK_SIZE +
((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)?	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)
2:0);	? IMM2_SIZE:0);
do	do
{	{
ADD_NEW((int)(end_code - start_code + length), 0);	ADD_NEW((int)(end_code - start_code + length), 0);
Line 556 else	Line 562 else

workspace[0] = 0; /* Bit indicating which vector is current */	workspace[0] = 0; /* Bit indicating which vector is current */

DPRINTF(("%.sEnd state = %d\n", rlevel2-2, SP, end_code - start_code));	DPRINTF(("%.sEnd state = %d\n", rlevel2-2, SP, (int)(end_code - start_code)));

/* Loop for scanning the subject */	/* Loop for scanning the subject */

Line 565 for (;;)	Line 571 for (;;)
{	{
int i, j;	int i, j;
int clen, dlen;	int clen, dlen;
unsigned int c, d;	pcre_uint32 c, d;
int forced_fail = 0;	int forced_fail = 0;
BOOL could_continue = FALSE;	BOOL partial_newline = FALSE;
	BOOL could_continue = reset_could_continue;
	reset_could_continue = FALSE;

/* Make the new state list into the active state list and empty the	/* Make the new state list into the active state list and empty the
new state list. */	new state list. */
Line 583 for (;;)	Line 591 for (;;)

#ifdef PCRE_DEBUG	#ifdef PCRE_DEBUG
printf("%.sNext character: rest of subject = \"", rlevel2-2, SP);	printf("%.sNext character: rest of subject = \"", rlevel2-2, SP);
pchars((uschar )ptr, strlen((char )ptr), stdout);	pchars(ptr, STRLEN_UC(ptr), stdout);
printf("\"\n");	printf("\"\n");

printf("%.sActive states: ", rlevel2-2, SP);	printf("%.sActive states: ", rlevel2-2, SP);
Line 603 for (;;)	Line 611 for (;;)

if (ptr < end_subject)	if (ptr < end_subject)
{	{
clen = 1; /* Number of bytes in the character */	clen = 1; /* Number of data items in the character */
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8) { GETCHARLEN(c, ptr, clen); } else	GETCHARLENTEST(c, ptr, clen);
#endif /* SUPPORT_UTF8 */	#else
c = *ptr;	c = *ptr;
	#endif /* SUPPORT_UTF */
}	}
else	else
{	{
Line 624 for (;;)	Line 633 for (;;)
{	{
stateblock *current_state = active_states + i;	stateblock *current_state = active_states + i;
BOOL caseless = FALSE;	BOOL caseless = FALSE;
const uschar *code;	const pcre_uchar *code;
int state_offset = current_state->offset;	int state_offset = current_state->offset;
int count, codevalue, rrc;	int codevalue, rrc;
	int count;

#ifdef PCRE_DEBUG	#ifdef PCRE_DEBUG
printf ("%.sProcessing state %d c=", rlevel2-2, SP, state_offset);	printf ("%.sProcessing state %d c=", rlevel2-2, SP, state_offset);
Line 637 for (;;)	Line 647 for (;;)

/* A negative offset is a special case meaning "hold off going to this	/* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have	(negated) state until the number of characters in the data field have
been skipped". */	been skipped". If the could_continue flag was passed over from a previous
	state, arrange for it to passed on. */

if (state_offset < 0)	if (state_offset < 0)
{	{
Line 646 for (;;)	Line 657 for (;;)
DPRINTF(("%.sSkipping this character\n", rlevel2-2, SP));	DPRINTF(("%.sSkipping this character\n", rlevel2-2, SP));
ADD_NEW_DATA(state_offset, current_state->count,	ADD_NEW_DATA(state_offset, current_state->count,
current_state->data - 1);	current_state->data - 1);
	if (could_continue) reset_could_continue = TRUE;
continue;	continue;
}	}
else	else
Line 685 for (;;)	Line 697 for (;;)
permitted.	permitted.

We also use this mechanism for opcodes such as OP_TYPEPLUS that take an	We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
argument that is not a data character - but is always one byte long. We	argument that is not a data character - but is always one byte long because
have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in	the values are small. We have to take special action to deal with \P, \p,
this case. To keep the other cases fast, convert these ones to new opcodes.	\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
*/	these ones to new opcodes. */

if (coptable[codevalue] > 0)	if (coptable[codevalue] > 0)
{	{
dlen = 1;	dlen = 1;
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else	if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
d = code[coptable[codevalue]];	d = code[coptable[codevalue]];
if (codevalue >= OP_TYPESTAR)	if (codevalue >= OP_TYPESTAR)
{	{
Line 779 for (;;)	Line 791 for (;;)
offsets[0] = (int)(current_subject - start_subject);	offsets[0] = (int)(current_subject - start_subject);
offsets[1] = (int)(ptr - start_subject);	offsets[1] = (int)(ptr - start_subject);
DPRINTF(("%.sSet matched string = \"%.s\"\n", rlevel*2-2, SP,	DPRINTF(("%.sSet matched string = \"%.s\"\n", rlevel*2-2, SP,
offsets[1] - offsets[0], current_subject));	offsets[1] - offsets[0], (char *)current_subject));
}	}
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)	if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
{	{
Line 816 for (;;)	Line 828 for (;;)
/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
case OP_CBRA:	case OP_CBRA:
case OP_SCBRA:	case OP_SCBRA:
ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE), 0);	ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
code += GET(code, 1);	code += GET(code, 1);
while (*code == OP_ALT)	while (*code == OP_ALT)
{	{
Line 884 for (;;)	Line 896 for (;;)
/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
case OP_ANY:	case OP_ANY:
if (clen > 0 && !IS_NEWLINE(ptr))	if (clen > 0 && !IS_NEWLINE(ptr))
{ ADD_NEW(state_offset + 1, 0); }	{
	if (ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else
	{
	ADD_NEW(state_offset + 1, 0);
	}
	}
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
Line 912 for (;;)	Line 937 for (;;)
(ptr == end_subject - md->nllen)	(ptr == end_subject - md->nllen)
))	))
{ ADD_ACTIVE(state_offset + 1, 0); }	{ ADD_ACTIVE(state_offset + 1, 0); }
	else if (ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
	{
	reset_could_continue = TRUE;
	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
	}
	else could_continue = partial_newline = TRUE;
	}
}	}
break;	break;

Line 924 for (;;)	Line 962 for (;;)
else if (clen == 0 \|\|	else if (clen == 0 \|\|
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))	((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
{ ADD_ACTIVE(state_offset + 1, 0); }	{ ADD_ACTIVE(state_offset + 1, 0); }
	else if (ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
	{
	reset_could_continue = TRUE;
	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
	}
	else could_continue = partial_newline = TRUE;
	}
}	}
else if (IS_NEWLINE(ptr))	else if (IS_NEWLINE(ptr))
{ ADD_ACTIVE(state_offset + 1, 0); }	{ ADD_ACTIVE(state_offset + 1, 0); }
Line 956 for (;;)	Line 1007 for (;;)

if (ptr > start_subject)	if (ptr > start_subject)
{	{
const uschar *temp = ptr - 1;	const pcre_uchar *temp = ptr - 1;
if (temp < md->start_used_ptr) md->start_used_ptr = temp;	if (temp < md->start_used_ptr) md->start_used_ptr = temp;
#ifdef SUPPORT_UTF8	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
if (utf8) BACKCHAR(temp);	if (utf) { BACKCHAR(temp); }
#endif	#endif
GETCHARTEST(d, temp);	GETCHARTEST(d, temp);
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
Line 1011 for (;;)	Line 1062 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[1])	switch(code[1])
{	{
Line 1024 for (;;)	Line 1076 for (;;)
break;	break;

case PT_GC:	case PT_GC:
OK = _pcre_ucp_gentype[prop->chartype] == code[2];	OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
break;	break;

case PT_PC:	case PT_PC:
Line 1038 for (;;)	Line 1090 for (;;)
/* These are specials for combination cases. */	/* These are specials for combination cases. */

case PT_ALNUM:	case PT_ALNUM:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N;	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;	break;

case PT_SPACE: /* Perl space */	case PT_SPACE: /* Perl space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_PXSPACE: /* POSIX space */	case PT_PXSPACE: /* POSIX space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_WORD:	case PT_WORD:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[2];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1086 for (;;)	Line 1153 for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1109 for (;;)	Line 1184 for (;;)
ADD_ACTIVE(state_offset + 2, 0);	ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1131 for (;;)	Line 1214 for (;;)
ADD_ACTIVE(state_offset + 2, 0);	ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1151 for (;;)	Line 1242 for (;;)
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
{	{
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 4, 0); }	{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
}	}
Line 1168 for (;;)	Line 1267 for (;;)
case OP_TYPEUPTO:	case OP_TYPEUPTO:
case OP_TYPEMINUPTO:	case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:	case OP_TYPEPOSUPTO:
ADD_ACTIVE(state_offset + 4, 0);	ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|	if (d == OP_ANY && ptr + 1 >= md->end_subject &&
	(md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
	NLBLOCK->nltype == NLTYPE_FIXED &&
	NLBLOCK->nllen == 2 &&
	c == NLBLOCK->nl[0])
	{
	could_continue = partial_newline = TRUE;
	}
	else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
(c < 256 &&	(c < 256 &&
(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
Line 1182 for (;;)	Line 1289 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 4, 0); }	{ ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
}	}
Line 1205 for (;;)	Line 1312 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[2])	switch(code[2])
{	{
Line 1218 for (;;)	Line 1326 for (;;)
break;	break;

case PT_GC:	case PT_GC:
OK = _pcre_ucp_gentype[prop->chartype] == code[3];	OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
break;	break;

case PT_PC:	case PT_PC:
Line 1232 for (;;)	Line 1340 for (;;)
/* These are specials for combination cases. */	/* These are specials for combination cases. */

case PT_ALNUM:	case PT_ALNUM:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N;	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;	break;

case PT_SPACE: /* Perl space */	case PT_SPACE: /* Perl space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_PXSPACE: /* POSIX space */	case PT_PXSPACE: /* POSIX space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_WORD:	case PT_WORD:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[3];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1279 for (;;)	Line 1402 for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:	case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count; /* Already matched */	count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }	if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
const uschar *nptr = ptr + clen;	int lgb, rgb;
	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)	if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nd;	dlen = 1;
int ndlen = 1;	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
GETCHARLEN(nd, nptr, ndlen);	rgb = UCD_GRAPHBREAK(d);
if (UCD_CATEGORY(nd) != ucp_M) break;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += ndlen;	lgb = rgb;
	nptr += dlen;
}	}
count++;	count++;
ADD_NEW_DATA(-state_offset, count, ncount);	ADD_NEW_DATA(-state_offset, count, ncount);
Line 1314 for (;;)	Line 1440 for (;;)
int ncount = 0;	int ncount = 0;
switch (c)	switch (c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL01;	goto ANYNL01;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;	if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */	/* Fall through */

ANYNL01:	ANYNL01:
case 0x000a:	case CHAR_LF:
if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)	if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
Line 1354 for (;;)	Line 1482 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
OK = TRUE;	OK = TRUE;
break;	break;

Line 1393 for (;;)	Line 1515 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
OK = TRUE;	OK = TRUE;
break;	break;

Line 1452 for (;;)	Line 1556 for (;;)
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[2])	switch(code[2])
{	{
Line 1465 for (;;)	Line 1570 for (;;)
break;	break;

case PT_GC:	case PT_GC:
OK = _pcre_ucp_gentype[prop->chartype] == code[3];	OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
break;	break;

case PT_PC:	case PT_PC:
Line 1479 for (;;)	Line 1584 for (;;)
/* These are specials for combination cases. */	/* These are specials for combination cases. */

case PT_ALNUM:	case PT_ALNUM:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N;	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;	break;

case PT_SPACE: /* Perl space */	case PT_SPACE: /* Perl space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_PXSPACE: /* POSIX space */	case PT_PXSPACE: /* POSIX space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_WORD:	case PT_WORD:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[3];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1535 for (;;)	Line 1655 for (;;)
QS2:	QS2:

ADD_ACTIVE(state_offset + 2, 0);	ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
const uschar *nptr = ptr + clen;	int lgb, rgb;
	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|
codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)	codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
Line 1545 for (;;)	Line 1666 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nd;	dlen = 1;
int ndlen = 1;	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
GETCHARLEN(nd, nptr, ndlen);	rgb = UCD_GRAPHBREAK(d);
if (UCD_CATEGORY(nd) != ucp_M) break;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += ndlen;	lgb = rgb;
	nptr += dlen;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);	ADD_NEW_DATA(-(state_offset + count), 0, ncount);
}	}
Line 1578 for (;;)	Line 1701 for (;;)
int ncount = 0;	int ncount = 0;
switch (c)	switch (c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL02;	goto ANYNL02;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;	if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */	/* Fall through */

ANYNL02:	ANYNL02:
case 0x000a:	case CHAR_LF:
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|
codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)	codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, ncount);	ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
break;	break;

default:	default:
Line 1626 for (;;)	Line 1751 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
OK = TRUE;	OK = TRUE;
break;	break;

Line 1648 for (;;)	Line 1767 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, 0);	ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}	}
}	}
break;	break;
Line 1672 for (;;)	Line 1791 for (;;)
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
OK = TRUE;	OK = TRUE;
break;	break;

Line 1707 for (;;)	Line 1808 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
ADD_NEW_DATA(-(state_offset + count), 0, 0);	ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
}	}
}	}
break;	break;
Line 1719 for (;;)	Line 1820 for (;;)
case OP_PROP_EXTRA + OP_TYPEMINUPTO:	case OP_PROP_EXTRA + OP_TYPEMINUPTO:
case OP_PROP_EXTRA + OP_TYPEPOSUPTO:	case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)	if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 6, 0); }	{ ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
	const pcre_uint32 *cp;
const ucd_record * prop = GET_UCD(c);	const ucd_record * prop = GET_UCD(c);
switch(code[4])	switch(code[1 + IMM2_SIZE + 1])
{	{
case PT_ANY:	case PT_ANY:
OK = TRUE;	OK = TRUE;
Line 1737 for (;;)	Line 1839 for (;;)
break;	break;

case PT_GC:	case PT_GC:
OK = _pcre_ucp_gentype[prop->chartype] == code[5];	OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
break;	break;

case PT_PC:	case PT_PC:
OK = prop->chartype == code[5];	OK = prop->chartype == code[1 + IMM2_SIZE + 2];
break;	break;

case PT_SC:	case PT_SC:
OK = prop->script == code[5];	OK = prop->script == code[1 + IMM2_SIZE + 2];
break;	break;

/* These are specials for combination cases. */	/* These are specials for combination cases. */

case PT_ALNUM:	case PT_ALNUM:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N;	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
break;	break;

case PT_SPACE: /* Perl space */	case PT_SPACE: /* Perl space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_PXSPACE: /* POSIX space */	case PT_PXSPACE: /* POSIX space */
OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|	c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|
c == CHAR_FF \|\| c == CHAR_CR;	c == CHAR_FF \|\| c == CHAR_CR;
break;	break;

case PT_WORD:	case PT_WORD:
OK = _pcre_ucp_gentype[prop->chartype] == ucp_L \|\|	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
_pcre_ucp_gentype[prop->chartype] == ucp_N \|\|	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
c == CHAR_UNDERSCORE;	c == CHAR_UNDERSCORE;
break;	break;

	case PT_CLIST:
	cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
	for (;;)
	{
	if (c < *cp) { OK = FALSE; break; }
	if (c == *cp++) { OK = TRUE; break; }
	}
	break;

	case PT_UCNC:
	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
	c == CHAR_GRAVE_ACCENT \|\| (c >= 0xa0 && c <= 0xd7ff) \|\|
	c >= 0xe000;
	break;

/* Should never occur, but keep compilers from grumbling. */	/* Should never occur, but keep compilers from grumbling. */

default:	default:
Line 1786 for (;;)	Line 1903 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + 6, 0); }	{ ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
}	}
Line 1800 for (;;)	Line 1917 for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:	case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:	case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)	if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }	{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
const uschar *nptr = ptr + clen;	int lgb, rgb;
	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nd;	dlen = 1;
int ndlen = 1;	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
GETCHARLEN(nd, nptr, ndlen);	rgb = UCD_GRAPHBREAK(d);
if (UCD_CATEGORY(nd) != ucp_M) break;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += ndlen;	lgb = rgb;
	nptr += dlen;
}	}
if (++count >= GET2(code, 1))	if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
{ ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }	reset_could_continue = TRUE;
	if (++count >= (int)GET2(code, 1))
	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else	else
{ ADD_NEW_DATA(-state_offset, count, ncount); }	{ ADD_NEW_DATA(-state_offset, count, ncount); }
}	}
Line 1834 for (;;)	Line 1956 for (;;)
case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:	case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:	case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)	if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }	{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
int ncount = 0;	int ncount = 0;
switch (c)	switch (c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
goto ANYNL03;	goto ANYNL03;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;	if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
/* Fall through */	/* Fall through */

ANYNL03:	ANYNL03:
case 0x000a:	case CHAR_LF:
if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
{	{
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
else	else
{ ADD_NEW_DATA(-state_offset, count, ncount); }	{ ADD_NEW_DATA(-state_offset, count, ncount); }
break;	break;
Line 1878 for (;;)	Line 2002 for (;;)
case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:	case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:	case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)	if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }	{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
OK = TRUE;	OK = TRUE;
break;	break;

Line 1906 for (;;)	Line 2024 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else	else
{ ADD_NEW_DATA(-state_offset, count, 0); }	{ ADD_NEW_DATA(-state_offset, count, 0); }
}	}
Line 1920 for (;;)	Line 2038 for (;;)
case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:	case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:	case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)	if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }	{ ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
BOOL OK;	BOOL OK;
switch (c)	switch (c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
OK = TRUE;	OK = TRUE;
break;	break;

Line 1961 for (;;)	Line 2061 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW_DATA(-(state_offset + 4), 0, 0); }	{ ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
else	else
{ ADD_NEW_DATA(-state_offset, count, 0); }	{ ADD_NEW_DATA(-state_offset, count, 0); }
}	}
Line 1984 for (;;)	Line 2084 for (;;)
case OP_CHARI:	case OP_CHARI:
if (clen == 0) break;	if (clen == 0) break;

#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8)	if (utf)
{	{
if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else	if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
{	{
unsigned int othercase;	unsigned int othercase;
if (c < 128) othercase = fcc[c]; else	if (c < 128)
	othercase = fcc[c];
/* If we have Unicode property support, we can use it to test the	else
other case of the character. */	/* If we have Unicode property support, we can use it to test the
	other case of the character. */
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
othercase = UCD_OTHERCASE(c);	othercase = UCD_OTHERCASE(c);
#else	#else
othercase = NOTACHAR;	othercase = NOTACHAR;
#endif	#endif

if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }	if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
}	}
}	}
else	else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
	/* Not UTF mode */
/* Non-UTF-8 mode */
{	{
if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }	if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
	{ ADD_NEW(state_offset + 2, 0); }
}	}
break;	break;

Line 2021 for (;;)	Line 2121 for (;;)
to wait for them to pass before continuing. */	to wait for them to pass before continuing. */

case OP_EXTUNI:	case OP_EXTUNI:
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)	if (clen > 0)
{	{
const uschar *nptr = ptr + clen;	int lgb, rgb;
	const pcre_uchar *nptr = ptr + clen;
int ncount = 0;	int ncount = 0;
	lgb = UCD_GRAPHBREAK(c);
while (nptr < end_subject)	while (nptr < end_subject)
{	{
int nclen = 1;	dlen = 1;
GETCHARLEN(c, nptr, nclen);	if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
if (UCD_CATEGORY(c) != ucp_M) break;	rgb = UCD_GRAPHBREAK(d);
	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
ncount++;	ncount++;
nptr += nclen;	lgb = rgb;
	nptr += dlen;
}	}
	if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
	reset_could_continue = TRUE;
ADD_NEW_DATA(-(state_offset + 1), 0, ncount);	ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
}	}
break;	break;
Line 2046 for (;;)	Line 2152 for (;;)
case OP_ANYNL:	case OP_ANYNL:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x000b:	case CHAR_VT:
case 0x000c:	case CHAR_FF:
case 0x0085:	case CHAR_NEL:
	#ifndef EBCDIC
case 0x2028:	case 0x2028:
case 0x2029:	case 0x2029:
	#endif /* Not EBCDIC */
if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;	if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;

case 0x000a:	case CHAR_LF:
ADD_NEW(state_offset + 1, 0);	ADD_NEW(state_offset + 1, 0);
break;	break;

case 0x000d:	case CHAR_CR:
if (ptr + 1 < end_subject && ptr[1] == 0x0a)	if (ptr + 1 >= end_subject)
{	{
	ADD_NEW(state_offset + 1, 0);
	if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
	reset_could_continue = TRUE;
	}
	else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
	{
ADD_NEW_DATA(-(state_offset + 1), 0, 1);	ADD_NEW_DATA(-(state_offset + 1), 0, 1);
}	}
else	else
Line 2074 for (;;)	Line 2188 for (;;)
case OP_NOT_VSPACE:	case OP_NOT_VSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
break;	break;

default:	default:
Line 2093 for (;;)	Line 2201 for (;;)
case OP_VSPACE:	case OP_VSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x000a:	VSPACE_CASES:
case 0x000b:
case 0x000c:
case 0x000d:
case 0x0085:
case 0x2028:
case 0x2029:
ADD_NEW(state_offset + 1, 0);	ADD_NEW(state_offset + 1, 0);
break;	break;

default: break;	default:
	break;
}	}
break;	break;

Line 2111 for (;;)	Line 2214 for (;;)
case OP_NOT_HSPACE:	case OP_NOT_HSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
break;	break;

default:	default:
Line 2142 for (;;)	Line 2227 for (;;)
case OP_HSPACE:	case OP_HSPACE:
if (clen > 0) switch(c)	if (clen > 0) switch(c)
{	{
case 0x09: /* HT */	HSPACE_CASES:
case 0x20: /* SPACE */
case 0xa0: /* NBSP */
case 0x1680: /* OGHAM SPACE MARK */
case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
case 0x2000: /* EN QUAD */
case 0x2001: /* EM QUAD */
case 0x2002: /* EN SPACE */
case 0x2003: /* EM SPACE */
case 0x2004: /* THREE-PER-EM SPACE */
case 0x2005: /* FOUR-PER-EM SPACE */
case 0x2006: /* SIX-PER-EM SPACE */
case 0x2007: /* FIGURE SPACE */
case 0x2008: /* PUNCTUATION SPACE */
case 0x2009: /* THIN SPACE */
case 0x200A: /* HAIR SPACE */
case 0x202f: /* NARROW NO-BREAK SPACE */
case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
case 0x3000: /* IDEOGRAPHIC SPACE */
ADD_NEW(state_offset + 1, 0);	ADD_NEW(state_offset + 1, 0);
break;	break;

	default:
	break;
}	}
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
/* Match a negated single character casefully. This is only used for	/* Match a negated single character casefully. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */

case OP_NOT:	case OP_NOT:
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }	if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
/* Match a negated single character caselessly. This is only used for	/* Match a negated single character caselessly. */
one-byte characters, that is, we know that d < 256. The character we are
checking (c) can be multibyte. */

case OP_NOTI:	case OP_NOTI:
if (clen > 0 && c != d && c != fcc[d])	if (clen > 0)
{ ADD_NEW(state_offset + dlen + 1, 0); }	{
	unsigned int otherd;
	#ifdef SUPPORT_UTF
	if (utf && d >= 128)
	{
	#ifdef SUPPORT_UCP
	otherd = UCD_OTHERCASE(d);
	#endif /* SUPPORT_UCP */
	}
	else
	#endif /* SUPPORT_UTF */
	otherd = TABLE_GET(d, fcc, d);
	if (c != d && c != otherd)
	{ ADD_NEW(state_offset + dlen + 1, 0); }
	}
break;	break;

/-----------------------------------------------------------------/	/-----------------------------------------------------------------/
Line 2206 for (;;)	Line 2286 for (;;)
if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }	if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8 && d >= 128)	if (utf && d >= 128)
{	{
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);	otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */	#endif /* SUPPORT_UCP */
}	}
else	else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
otherd = fcc[d];	otherd = TABLE_GET(d, fcc, d);
}	}
if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
{	{
Line 2253 for (;;)	Line 2333 for (;;)
ADD_ACTIVE(state_offset + dlen + 1, 0);	ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8 && d >= 128)	if (utf && d >= 128)
{	{
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);	otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */	#endif /* SUPPORT_UCP */
}	}
else	else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
otherd = fcc[d];	otherd = TABLE_GET(d, fcc, d);
}	}
if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
{	{
Line 2298 for (;;)	Line 2378 for (;;)
ADD_ACTIVE(state_offset + dlen + 1, 0);	ADD_ACTIVE(state_offset + dlen + 1, 0);
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8 && d >= 128)	if (utf && d >= 128)
{	{
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);	otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */	#endif /* SUPPORT_UCP */
}	}
else	else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
otherd = fcc[d];	otherd = TABLE_GET(d, fcc, d);
}	}
if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
{	{
Line 2335 for (;;)	Line 2415 for (;;)
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8 && d >= 128)	if (utf && d >= 128)
{	{
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);	otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */	#endif /* SUPPORT_UCP */
}	}
else	else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
otherd = fcc[d];	otherd = TABLE_GET(d, fcc, d);
}	}
if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
{	{
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 3, 0); }	{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
}	}
Line 2375 for (;;)	Line 2455 for (;;)
case OP_NOTUPTO:	case OP_NOTUPTO:
case OP_NOTMINUPTO:	case OP_NOTMINUPTO:
case OP_NOTPOSUPTO:	case OP_NOTPOSUPTO:
ADD_ACTIVE(state_offset + dlen + 3, 0);	ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
count = current_state->count; /* Number already matched */	count = current_state->count; /* Number already matched */
if (clen > 0)	if (clen > 0)
{	{
unsigned int otherd = NOTACHAR;	pcre_uint32 otherd = NOTACHAR;
if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8 && d >= 128)	if (utf && d >= 128)
{	{
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
otherd = UCD_OTHERCASE(d);	otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */	#endif /* SUPPORT_UCP */
}	}
else	else
#endif /* SUPPORT_UTF8 */	#endif /* SUPPORT_UTF */
otherd = fcc[d];	otherd = TABLE_GET(d, fcc, d);
}	}
if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
{	{
Line 2400 for (;;)	Line 2480 for (;;)
active_count--; /* Remove non-match possibility */	active_count--; /* Remove non-match possibility */
next_active_state--;	next_active_state--;
}	}
if (++count >= GET2(code, 1))	if (++count >= (int)GET2(code, 1))
{ ADD_NEW(state_offset + dlen + 3, 0); }	{ ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
}	}
Line 2418 for (;;)	Line 2498 for (;;)
{	{
BOOL isinclass = FALSE;	BOOL isinclass = FALSE;
int next_state_offset;	int next_state_offset;
const uschar *ecode;	const pcre_uchar *ecode;

/* For a simple class, there is always just a 32-byte table, and we	/* For a simple class, there is always just a 32-byte table, and we
can set isinclass from it. */	can set isinclass from it. */

if (codevalue != OP_XCLASS)	if (codevalue != OP_XCLASS)
{	{
ecode = code + 33;	ecode = code + 1 + (32 / sizeof(pcre_uchar));
if (clen > 0)	if (clen > 0)
{	{
isinclass = (c > 255)? (codevalue == OP_NCLASS) :	isinclass = (c > 255)? (codevalue == OP_NCLASS) :
((code[1 + c/8] & (1 << (c&7))) != 0);	((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
}	}
}	}

Line 2440 for (;;)	Line 2520 for (;;)
else	else
{	{
ecode = code + GET(code, 1);	ecode = code + GET(code, 1);
if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);	if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
}	}

/* At this point, isinclass is set for all kinds of class, and ecode	/* At this point, isinclass is set for all kinds of class, and ecode
Line 2473 for (;;)	Line 2553 for (;;)
case OP_CRRANGE:	case OP_CRRANGE:
case OP_CRMINRANGE:	case OP_CRMINRANGE:
count = current_state->count; /* Already matched */	count = current_state->count; /* Already matched */
if (count >= GET2(ecode, 1))	if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 5, 0); }	{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)	if (isinclass)
{	{
int max = GET2(ecode, 3);	int max = (int)GET2(ecode, 1 + IMM2_SIZE);
if (++count >= max && max != 0) /* Max 0 => no limit */	if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 5, 0); }	{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else	else
{ ADD_NEW(state_offset, count); }	{ ADD_NEW(state_offset, count); }
}	}
Line 2510 for (;;)	Line 2590 for (;;)
int rc;	int rc;
int local_offsets[2];	int local_offsets[2];
int local_workspace[1000];	int local_workspace[1000];
const uschar *endasscode = code + GET(code, 1);	const pcre_uchar *endasscode = code + GET(code, 1);

while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);	while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);

Line 2547 for (;;)	Line 2627 for (;;)
if (code[LINK_SIZE+1] == OP_CALLOUT)	if (code[LINK_SIZE+1] == OP_CALLOUT)
{	{
rrc = 0;	rrc = 0;
if (pcre_callout != NULL)	if (PUBL(callout) != NULL)
{	{
pcre_callout_block cb;	PUBL(callout_block) cb;
cb.version = 1; /* Version 1 of the callout block */	cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[LINK_SIZE+2];	cb.callout_number = code[LINK_SIZE+2];
cb.offset_vector = offsets;	cb.offset_vector = offsets;
	#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;	cb.subject = (PCRE_SPTR)start_subject;
	#elif defined COMPILE_PCRE16
	cb.subject = (PCRE_SPTR16)start_subject;
	#elif defined COMPILE_PCRE32
	cb.subject = (PCRE_SPTR32)start_subject;
	#endif
cb.subject_length = (int)(end_subject - start_subject);	cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);	cb.start_match = (int)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject);	cb.current_position = (int)(ptr - start_subject);
Line 2563 for (;;)	Line 2649 for (;;)
cb.capture_last = -1;	cb.capture_last = -1;
cb.callout_data = md->callout_data;	cb.callout_data = md->callout_data;
cb.mark = NULL; /* No (MARK) support /	cb.mark = NULL; /* No (MARK) support /
if ((rrc = (pcre_callout)(&cb)) < 0) return rrc; / Abandon */	if ((rrc = (PUBL(callout))(&cb)) < 0) return rrc; / Abandon */
}	}
if (rrc > 0) break; /* Fail this thread */	if (rrc > 0) break; /* Fail this thread */
code += _pcre_OP_lengths[OP_CALLOUT]; /* Skip callout data */	code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
}	}

condcode = code[LINK_SIZE+1];	condcode = code[LINK_SIZE+1];
Line 2587 for (;;)	Line 2673 for (;;)

else if (condcode == OP_RREF \|\| condcode == OP_NRREF)	else if (condcode == OP_RREF \|\| condcode == OP_NRREF)
{	{
int value = GET2(code, LINK_SIZE+2);	int value = GET2(code, LINK_SIZE + 2);
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;	if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
if (md->recursive != NULL)	if (md->recursive != NULL)
{ ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }	{ ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }	else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
}	}

Line 2599 for (;;)	Line 2685 for (;;)
else	else
{	{
int rc;	int rc;
const uschar *asscode = code + LINK_SIZE + 1;	const pcre_uchar *asscode = code + LINK_SIZE + 1;
const uschar *endasscode = asscode + GET(asscode, 1);	const pcre_uchar *endasscode = asscode + GET(asscode, 1);

while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);	while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);

Line 2631 for (;;)	Line 2717 for (;;)
dfa_recursion_info *ri;	dfa_recursion_info *ri;
int local_offsets[1000];	int local_offsets[1000];
int local_workspace[1000];	int local_workspace[1000];
const uschar *callpat = start_code + GET(code, 1);	const pcre_uchar *callpat = start_code + GET(code, 1);
int recno = (callpat == md->start_code)? 0 :	int recno = (callpat == md->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE);	GET2(callpat, 1 + LINK_SIZE);
int rc;	int rc;
Line 2682 for (;;)	Line 2768 for (;;)
{	{
for (rc = rc*2 - 2; rc >= 0; rc -= 2)	for (rc = rc*2 - 2; rc >= 0; rc -= 2)
{	{
const uschar *p = start_subject + local_offsets[rc];
const uschar *pp = start_subject + local_offsets[rc+1];
int charcount = local_offsets[rc+1] - local_offsets[rc];	int charcount = local_offsets[rc+1] - local_offsets[rc];
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
	if (utf)
	{
	const pcre_uchar *p = start_subject + local_offsets[rc];
	const pcre_uchar *pp = start_subject + local_offsets[rc+1];
	while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
	}
	#endif
if (charcount > 0)	if (charcount > 0)
{	{
ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));	ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
Line 2708 for (;;)	Line 2799 for (;;)
case OP_BRAPOSZERO:	case OP_BRAPOSZERO:
{	{
int charcount, matched_count;	int charcount, matched_count;
const uschar *local_ptr = ptr;	const pcre_uchar *local_ptr = ptr;
BOOL allow_zero;	BOOL allow_zero;

if (codevalue == OP_BRAPOSZERO)	if (codevalue == OP_BRAPOSZERO)
Line 2758 for (;;)	Line 2849 for (;;)

if (matched_count > 0 \|\| allow_zero)	if (matched_count > 0 \|\| allow_zero)
{	{
const uschar *end_subpattern = code;	const pcre_uchar *end_subpattern = code;
int next_state_offset;	int next_state_offset;

do { end_subpattern += GET(end_subpattern, 1); }	do { end_subpattern += GET(end_subpattern, 1); }
Line 2779 for (;;)	Line 2870 for (;;)
}	}
else	else
{	{
const uschar *p = ptr;	const pcre_uchar *p = ptr;
const uschar *pp = local_ptr;	const pcre_uchar *pp = local_ptr;
charcount = (int)(pp - p);	charcount = (int)(pp - p);
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
	if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
	#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));	ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
}	}
}	}
Line 2809 for (;;)	Line 2902 for (;;)

if (rc >= 0)	if (rc >= 0)
{	{
const uschar *end_subpattern = code;	const pcre_uchar *end_subpattern = code;
int charcount = local_offsets[1] - local_offsets[0];	int charcount = local_offsets[1] - local_offsets[0];
int next_state_offset, repeat_state_offset;	int next_state_offset, repeat_state_offset;

Line 2862 for (;;)	Line 2955 for (;;)
}	}
else	else
{	{
const uschar *p = start_subject + local_offsets[0];	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
const uschar *pp = start_subject + local_offsets[1];	if (utf)
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;	{
	const pcre_uchar *p = start_subject + local_offsets[0];
	const pcre_uchar *pp = start_subject + local_offsets[1];
	while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
	}
	#endif
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));	ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
if (repeat_state_offset >= 0)	if (repeat_state_offset >= 0)
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }	{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
Line 2880 for (;;)	Line 2978 for (;;)

case OP_CALLOUT:	case OP_CALLOUT:
rrc = 0;	rrc = 0;
if (pcre_callout != NULL)	if (PUBL(callout) != NULL)
{	{
pcre_callout_block cb;	PUBL(callout_block) cb;
cb.version = 1; /* Version 1 of the callout block */	cb.version = 1; /* Version 1 of the callout block */
cb.callout_number = code[1];	cb.callout_number = code[1];
cb.offset_vector = offsets;	cb.offset_vector = offsets;
	#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)start_subject;	cb.subject = (PCRE_SPTR)start_subject;
	#elif defined COMPILE_PCRE16
	cb.subject = (PCRE_SPTR16)start_subject;
	#elif defined COMPILE_PCRE32
	cb.subject = (PCRE_SPTR32)start_subject;
	#endif
cb.subject_length = (int)(end_subject - start_subject);	cb.subject_length = (int)(end_subject - start_subject);
cb.start_match = (int)(current_subject - start_subject);	cb.start_match = (int)(current_subject - start_subject);
cb.current_position = (int)(ptr - start_subject);	cb.current_position = (int)(ptr - start_subject);
Line 2896 for (;;)	Line 3000 for (;;)
cb.capture_last = -1;	cb.capture_last = -1;
cb.callout_data = md->callout_data;	cb.callout_data = md->callout_data;
cb.mark = NULL; /* No (MARK) support /	cb.mark = NULL; /* No (MARK) support /
if ((rrc = (pcre_callout)(&cb)) < 0) return rrc; / Abandon */	if ((rrc = (PUBL(callout))(&cb)) < 0) return rrc; / Abandon */
}	}
if (rrc == 0)	if (rrc == 0)
{ ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }	{ ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
break;	break;


Line 2928 for (;;)	Line 3032 for (;;)
if (new_count <= 0)	if (new_count <= 0)
{	{
if (rlevel == 1 && /* Top level, and */	if (rlevel == 1 && /* Top level, and */
could_continue && /* Some could go on */	could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */	forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */	( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */	(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
Line 2936 for (;;)	Line 3040 for (;;)
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */	((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
match_count < 0) /* no matches */	match_count < 0) /* no matches */
) && /* And... */	) && /* And... */
ptr >= end_subject && /* Reached end of subject */	(
ptr > md->start_used_ptr) /* Inspected non-empty string */	partial_newline \|\| /* Either partial NL */
{	( /* or ... */
if (offsetcount >= 2)	ptr >= end_subject && /* End of subject and */
{	ptr > md->start_used_ptr) /* Inspected non-empty string */
offsets[0] = (int)(md->start_used_ptr - start_subject);	)
offsets[1] = (int)(end_subject - start_subject);	)
}
match_count = PCRE_ERROR_PARTIAL;	match_count = PCRE_ERROR_PARTIAL;
}

DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"	DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
"%.s---------------------\n\n", rlevel2-2, SP, rlevel, match_count,	"%.s---------------------\n\n", rlevel2-2, SP, rlevel, match_count,
rlevel*2-2, SP));	rlevel*2-2, SP));
Line 2996 Returns: > 0 => number of match offset pairs	Line 3097 Returns: > 0 => number of match offset pairs
< -1 => some kind of unexpected problem	< -1 => some kind of unexpected problem
*/	*/

	#if defined COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre argument_re, const pcre_extra extra_data,	pcre_dfa_exec(const pcre argument_re, const pcre_extra extra_data,
const char subject, int length, int start_offset, int options, int offsets,	const char subject, int length, int start_offset, int options, int offsets,
int offsetcount, int *workspace, int wscount)	int offsetcount, int *workspace, int wscount)
	#elif defined COMPILE_PCRE16
	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
	pcre16_dfa_exec(const pcre16 argument_re, const pcre16_extra extra_data,
	PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
	int offsetcount, int *workspace, int wscount)
	#elif defined COMPILE_PCRE32
	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
	pcre32_dfa_exec(const pcre32 argument_re, const pcre32_extra extra_data,
	PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
	int offsetcount, int *workspace, int wscount)
	#endif
{	{
real_pcre re = (real_pcre )argument_re;	REAL_PCRE re = (REAL_PCRE )argument_re;
dfa_match_data match_block;	dfa_match_data match_block;
dfa_match_data *md = &match_block;	dfa_match_data *md = &match_block;
BOOL utf8, anchored, startline, firstline;	BOOL utf, anchored, startline, firstline;
const uschar current_subject, end_subject, *lcc;	const pcre_uchar current_subject, end_subject;

pcre_study_data internal_study;
const pcre_study_data *study = NULL;	const pcre_study_data *study = NULL;
real_pcre internal_re;

const uschar *req_byte_ptr;	const pcre_uchar *req_char_ptr;
const uschar *start_bits = NULL;	const pcre_uint8 *start_bits = NULL;
BOOL first_byte_caseless = FALSE;	BOOL has_first_char = FALSE;
BOOL req_byte_caseless = FALSE;	BOOL has_req_char = FALSE;
int first_byte = -1;	pcre_uchar first_char = 0;
int req_byte = -1;	pcre_uchar first_char2 = 0;
int req_byte2 = -1;	pcre_uchar req_char = 0;
	pcre_uchar req_char2 = 0;
int newline;	int newline;

/* Plausibility checks */	/* Plausibility checks */
Line 3027 if (re == NULL \|\| subject == NULL \|\| workspace == NULL	Line 3138 if (re == NULL \|\| subject == NULL \|\| workspace == NULL
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;	(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;	if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;	if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
	if (length < 0) return PCRE_ERROR_BADLENGTH;
if (start_offset < 0 \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;	if (start_offset < 0 \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;

/* We need to find the pointer to any study data before we test for byte	/* Check that the first field in the block is the magic number. If it is not,
flipping, so we scan the extra_data block first. This may set two fields in the	return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
match block, so we must initialize them beforehand. However, the other fields	REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
in the match block must not be set until after the byte flipping. */	means that the pattern is likely compiled with different endianness. */

	if (re->magic_number != MAGIC_NUMBER)
	return re->magic_number == REVERSED_MAGIC_NUMBER?
	PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
	if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;

	/* If restarting after a partial match, do some sanity checks on the contents
	of the workspace. */

	if ((options & PCRE_DFA_RESTART) != 0)
	{
	if ((workspace[0] & (-2)) != 0 \|\| workspace[1] < 1 \|\|
	workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
	return PCRE_ERROR_DFA_BADRESTART;
	}

	/* Set up study, callout, and table data */

md->tables = re->tables;	md->tables = re->tables;
md->callout_data = NULL;	md->callout_data = NULL;

Line 3051 if (extra_data != NULL)	Line 3180 if (extra_data != NULL)
md->tables = extra_data->tables;	md->tables = extra_data->tables;
}	}

/* Check that the first field in the block is the magic number. If it is not,
test for a regex that was compiled on a host of opposite endianness. If this is
the case, flipped values are put in internal_re and internal_study if there was
study data too. */

if (re->magic_number != MAGIC_NUMBER)
{
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
if (re == NULL) return PCRE_ERROR_BADMAGIC;
if (study != NULL) study = &internal_study;
}

/* Set some local values */	/* Set some local values */

current_subject = (const unsigned char *)subject + start_offset;	current_subject = (const pcre_uchar *)subject + start_offset;
end_subject = (const unsigned char *)subject + length;	end_subject = (const pcre_uchar *)subject + length;
req_byte_ptr = current_subject - 1;	req_char_ptr = current_subject - 1;

#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
utf8 = (re->options & PCRE_UTF8) != 0;	/* PCRE_UTF(16\|32) have the same value as PCRE_UTF8. */
	utf = (re->options & PCRE_UTF8) != 0;
#else	#else
utf8 = FALSE;	utf = FALSE;
#endif	#endif

anchored = (options & (PCRE_ANCHORED\|PCRE_DFA_RESTART)) != 0 \|\|	anchored = (options & (PCRE_ANCHORED\|PCRE_DFA_RESTART)) != 0 \|\|
Line 3080 anchored = (options & (PCRE_ANCHORED\|PCRE_DFA_RESTART)	Line 3198 anchored = (options & (PCRE_ANCHORED\|PCRE_DFA_RESTART)

/* The remaining fixed data for passing around. */	/* The remaining fixed data for passing around. */

md->start_code = (const uschar *)argument_re +	md->start_code = (const pcre_uchar *)argument_re +
re->name_table_offset + re->name_count * re->name_entry_size;	re->name_table_offset + re->name_count * re->name_entry_size;
md->start_subject = (const unsigned char *)subject;	md->start_subject = (const pcre_uchar *)subject;
md->end_subject = end_subject;	md->end_subject = end_subject;
md->start_offset = start_offset;	md->start_offset = start_offset;
md->moptions = options;	md->moptions = options;
Line 3143 else	Line 3261 else
/* Check a UTF-8 string if required. Unfortunately there's no way of passing	/* Check a UTF-8 string if required. Unfortunately there's no way of passing
back the character offset. */	back the character offset. */

#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)	if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
{	{
int erroroffset;	int erroroffset;
int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);	int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
if (errorcode != 0)	if (errorcode != 0)
{	{
if (offsetcount >= 2)	if (offsetcount >= 2)
Line 3155 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)	Line 3273 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
offsets[0] = erroroffset;	offsets[0] = erroroffset;
offsets[1] = errorcode;	offsets[1] = errorcode;
}	}
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?	#if defined COMPILE_PCRE8
	return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;	PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
	#elif defined COMPILE_PCRE16
	return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
	PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
	#elif defined COMPILE_PCRE32
	return PCRE_ERROR_BADUTF32;
	#endif
}	}
	#if defined COMPILE_PCRE8 \|\| defined COMPILE_PCRE16
if (start_offset > 0 && start_offset < length &&	if (start_offset > 0 && start_offset < length &&
(((USPTR)subject)[start_offset] & 0xc0) == 0x80)	NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
return PCRE_ERROR_BADUTF8_OFFSET;	return PCRE_ERROR_BADUTF8_OFFSET;
	#endif
}	}
#endif	#endif

Line 3168 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)	Line 3295 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
is a feature that makes it possible to save compiled regex and re-use them	is a feature that makes it possible to save compiled regex and re-use them
in other programs later. */	in other programs later. */

if (md->tables == NULL) md->tables = _pcre_default_tables;	if (md->tables == NULL) md->tables = PRIV(default_tables);

/* The lower casing table and the "must be at the start of a line" flag are	/* The "must be at the start of a line" flags are used in a loop when finding
used in a loop when finding where to start. */	where to start. */

lcc = md->tables + lcc_offset;
startline = (re->flags & PCRE_STARTLINE) != 0;	startline = (re->flags & PCRE_STARTLINE) != 0;
firstline = (re->options & PCRE_FIRSTLINE) != 0;	firstline = (re->options & PCRE_FIRSTLINE) != 0;

Line 3187 if (!anchored)	Line 3313 if (!anchored)
{	{
if ((re->flags & PCRE_FIRSTSET) != 0)	if ((re->flags & PCRE_FIRSTSET) != 0)
{	{
first_byte = re->first_byte & 255;	has_first_char = TRUE;
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)	first_char = first_char2 = (pcre_uchar)(re->first_char);
first_byte = lcc[first_byte];	if ((re->flags & PCRE_FCH_CASELESS) != 0)
	{
	first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
	#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
	if (utf && first_char > 127)
	first_char2 = UCD_OTHERCASE(first_char);
	#endif
	}
}	}
else	else
{	{
Line 3204 character" set. */	Line 3337 character" set. */

if ((re->flags & PCRE_REQCHSET) != 0)	if ((re->flags & PCRE_REQCHSET) != 0)
{	{
req_byte = re->req_byte & 255;	has_req_char = TRUE;
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;	req_char = req_char2 = (pcre_uchar)(re->req_char);
req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */	if ((re->flags & PCRE_RCH_CASELESS) != 0)
	{
	req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
	#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
	if (utf && req_char > 127)
	req_char2 = UCD_OTHERCASE(req_char);
	#endif
	}
}	}

/* Call the main matching function, looping for a non-anchored regex after a	/* Call the main matching function, looping for a non-anchored regex after a
Line 3219 for (;;)	Line 3359 for (;;)

if ((options & PCRE_DFA_RESTART) == 0)	if ((options & PCRE_DFA_RESTART) == 0)
{	{
const uschar *save_end_subject = end_subject;	const pcre_uchar *save_end_subject = end_subject;

/* If firstline is TRUE, the start of the match is constrained to the first	/* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. Implement this by temporarily adjusting	line of a multiline string. Implement this by temporarily adjusting
Line 3228 for (;;)	Line 3368 for (;;)

if (firstline)	if (firstline)
{	{
USPTR t = current_subject;	PCRE_PUCHAR t = current_subject;
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8)	if (utf)
{	{
while (t < md->end_subject && !IS_NEWLINE(t))	while (t < md->end_subject && !IS_NEWLINE(t))
{	{
t++;	t++;
while (t < end_subject && (*t & 0xc0) == 0x80) t++;	ACROSSCHAR(t < end_subject, *t, t++);
}	}
}	}
else	else
Line 3252 for (;;)	Line 3392 for (;;)

if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == 0)	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == 0)
{	{
/* Advance to a known first byte. */	/* Advance to a known first char. */

if (first_byte >= 0)	if (has_first_char)
{	{
if (first_byte_caseless)	if (first_char != first_char2)
	{
	pcre_uchar csc;
while (current_subject < end_subject &&	while (current_subject < end_subject &&
lcc[*current_subject] != first_byte)	(csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
current_subject++;	current_subject++;
	}
else	else
while (current_subject < end_subject &&	while (current_subject < end_subject &&
*current_subject != first_byte)	RAWUCHARTEST(current_subject) != first_char)
current_subject++;	current_subject++;
}	}

Line 3272 for (;;)	Line 3415 for (;;)
{	{
if (current_subject > md->start_subject + start_offset)	if (current_subject > md->start_subject + start_offset)
{	{
#ifdef SUPPORT_UTF8	#ifdef SUPPORT_UTF
if (utf8)	if (utf)
{	{
while (current_subject < end_subject &&	while (current_subject < end_subject &&
!WAS_NEWLINE(current_subject))	!WAS_NEWLINE(current_subject))
{	{
current_subject++;	current_subject++;
while(current_subject < end_subject &&	ACROSSCHAR(current_subject < end_subject, *current_subject,
(*current_subject & 0xc0) == 0x80)	current_subject++);
current_subject++;
}	}
}	}
else	else
Line 3293 for (;;)	Line 3435 for (;;)
ANYCRLF, and we are now at a LF, advance the match position by one	ANYCRLF, and we are now at a LF, advance the match position by one
more character. */	more character. */

if (current_subject[-1] == CHAR_CR &&	if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&	(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&
current_subject < end_subject &&	current_subject < end_subject &&
*current_subject == CHAR_NL)	RAWUCHARTEST(current_subject) == CHAR_NL)
current_subject++;	current_subject++;
}	}
}	}
Line 3307 for (;;)	Line 3449 for (;;)
{	{
while (current_subject < end_subject)	while (current_subject < end_subject)
{	{
register unsigned int c = *current_subject;	register pcre_uint32 c = RAWUCHARTEST(current_subject);
	#ifndef COMPILE_PCRE8
	if (c > 255) c = 255;
	#endif
if ((start_bits[c/8] & (1 << (c&7))) == 0)	if ((start_bits[c/8] & (1 << (c&7))) == 0)
{	{
current_subject++;	current_subject++;
#ifdef SUPPORT_UTF8	#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (utf8)	/* In non 8-bit mode, the iteration will stop for
while(current_subject < end_subject &&	characters > 255 at the beginning or not stop at all. */
(*current_subject & 0xc0) == 0x80) current_subject++;	if (utf)
	ACROSSCHAR(current_subject < end_subject, *current_subject,
	current_subject++);
#endif	#endif
}	}
else break;	else break;
Line 3342 for (;;)	Line 3489 for (;;)
(pcre_uint32)(end_subject - current_subject) < study->minlength)	(pcre_uint32)(end_subject - current_subject) < study->minlength)
return PCRE_ERROR_NOMATCH;	return PCRE_ERROR_NOMATCH;

/* If req_byte is set, we know that that character must appear in the	/* If req_char is set, we know that that character must appear in the
subject for the match to succeed. If the first character is set, req_byte	subject for the match to succeed. If the first character is set, req_char
must be later in the subject; otherwise the test starts at the match	must be later in the subject; otherwise the test starts at the match
point. This optimization can save a huge amount of work in patterns with	point. This optimization can save a huge amount of work in patterns with
nested unlimited repeats that aren't going to match. Writing separate	nested unlimited repeats that aren't going to match. Writing separate
Line 3355 for (;;)	Line 3502 for (;;)
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte	patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
string... so we don't do this when the string is sufficiently long. */	string... so we don't do this when the string is sufficiently long. */

if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)	if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
{	{
register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);	register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);

/* We don't need to repeat the search if we haven't yet reached the	/* We don't need to repeat the search if we haven't yet reached the
place we found it at last time. */	place we found it at last time. */

if (p > req_byte_ptr)	if (p > req_char_ptr)
{	{
if (req_byte_caseless)	if (req_char != req_char2)
{	{
while (p < end_subject)	while (p < end_subject)
{	{
register int pp = *p++;	register pcre_uint32 pp = RAWUCHARINCTEST(p);
if (pp == req_byte \|\| pp == req_byte2) { p--; break; }	if (pp == req_char \|\| pp == req_char2) { p--; break; }
}	}
}	}
else	else
{	{
while (p < end_subject)	while (p < end_subject)
{	{
if (*p++ == req_byte) { p--; break; }	if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
}	}
}	}

Line 3389 for (;;)	Line 3536 for (;;)
found it, so that we don't search again next time round the loop if	found it, so that we don't search again next time round the loop if
the start hasn't passed this character yet. */	the start hasn't passed this character yet. */

req_byte_ptr = p;	req_char_ptr = p;
}	}
}	}
}	}
Line 3414 for (;;)	Line 3561 for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry	/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */	on only if not anchored. */

if (rc != PCRE_ERROR_NOMATCH \|\| anchored) return rc;	if (rc != PCRE_ERROR_NOMATCH \|\| anchored)
	{
	if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
	{
	offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
	offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
	if (offsetcount > 2)
	offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
	}
	return rc;
	}

/* Advance to the next subject character unless we are at the end of a line	/* Advance to the next subject character unless we are at the end of a line
and firstline is set. */	and firstline is set. */

if (firstline && IS_NEWLINE(current_subject)) break;	if (firstline && IS_NEWLINE(current_subject)) break;
current_subject++;	current_subject++;
if (utf8)	#ifdef SUPPORT_UTF
	if (utf)
{	{
while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)	ACROSSCHAR(current_subject < end_subject, *current_subject,
current_subject++;	current_subject++);
}	}
	#endif
if (current_subject > end_subject) break;	if (current_subject > end_subject) break;

/* If we have just passed a CR and we are now at a LF, and the pattern does	/* If we have just passed a CR and we are now at a LF, and the pattern does
not contain any explicit matches for \r or \n, and the newline option is CRLF	not contain any explicit matches for \r or \n, and the newline option is CRLF
or ANY or ANYCRLF, advance the match position by one more character. */	or ANY or ANYCRLF, advance the match position by one more character. */

if (current_subject[-1] == CHAR_CR &&	if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
current_subject < end_subject &&	current_subject < end_subject &&
*current_subject == CHAR_NL &&	RAWUCHARTEST(current_subject) == CHAR_NL &&
(re->flags & PCRE_HASCRORLF) == 0 &&	(re->flags & PCRE_HASCRORLF) == 0 &&
(md->nltype == NLTYPE_ANY \|\|	(md->nltype == NLTYPE_ANY \|\|
md->nltype == NLTYPE_ANYCRLF \|\|	md->nltype == NLTYPE_ANYCRLF \|\|

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1.1.1
changed lines
	Added in v.1.1.1.4