embedaddon/pcre/pcre_exec.c - diff

Return to pcre_exec.c CVS log

Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / pcre

Diff for /embedaddon/pcre/pcre_exec.c between versions 1.1.1.4 and 1.1.1.5

version 1.1.1.4, 2013/07/22 08:25:56	version 1.1.1.5, 2014/06/15 19:46:04
Line 107 because the offset vector is always a multiple of 3 lo	Line 107 because the offset vector is always a multiple of 3 lo

/* Min and max values for the common repeats; for the maxima, 0 => infinity */	/* Min and max values for the common repeats; for the maxima, 0 => infinity */

static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };	static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };	static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };

#ifdef PCRE_DEBUG	#ifdef PCRE_DEBUG
/*************************************************	/*************************************************
Line 167 match_ref(int offset, register PCRE_PUCHAR eptr, int l	Line 167 match_ref(int offset, register PCRE_PUCHAR eptr, int l
{	{
PCRE_PUCHAR eptr_start = eptr;	PCRE_PUCHAR eptr_start = eptr;
register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];	register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
#ifdef SUPPORT_UTF	#if defined SUPPORT_UTF && defined SUPPORT_UCP
BOOL utf = md->utf;	BOOL utf = md->utf;
#endif	#endif

Line 195 ASCII characters. */	Line 195 ASCII characters. */

if (caseless)	if (caseless)
{	{
#ifdef SUPPORT_UTF	#if defined SUPPORT_UTF && defined SUPPORT_UCP
#ifdef SUPPORT_UCP
if (utf)	if (utf)
{	{
/* Match characters up to the end of the reference. NOTE: the number of	/* Match characters up to the end of the reference. NOTE: the number of
Line 230 if (caseless)	Line 229 if (caseless)
}	}
else	else
#endif	#endif
#endif

/* The same code works when not in UTF-8 mode and in UTF-8 mode when there	/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */	is no UCP support. */
Line 312 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8,	Line 310 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8,
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,	RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,	RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,	RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
RM61, RM62, RM63, RM64, RM65, RM66, RM67, RM68 };	RM61, RM62, RM63, RM64, RM65, RM66, RM67 };

/* These versions of the macros use the stack, as normal. There are debugging	/* These versions of the macros use the stack, as normal. There are debugging
versions and production versions. Note that the "rw" argument of RMATCH isn't	versions and production versions. Note that the "rw" argument of RMATCH isn't
Line 1173 for (;;)	Line 1171 for (;;)
ecode = md->start_code + code_offset;	ecode = md->start_code + code_offset;
save_capture_last = md->capture_last;	save_capture_last = md->capture_last;
matched_once = TRUE;	matched_once = TRUE;
	mstart = md->start_match_ptr; /* In case \K changed it */
continue;	continue;
}	}

Line 1245 for (;;)	Line 1244 for (;;)
eptr = md->end_match_ptr;	eptr = md->end_match_ptr;
ecode = md->start_code + code_offset;	ecode = md->start_code + code_offset;
matched_once = TRUE;	matched_once = TRUE;
	mstart = md->start_match_ptr; /* In case \K reset it */
continue;	continue;
}	}

Line 1274 for (;;)	Line 1274 for (;;)

/* Control never reaches here. */	/* Control never reaches here. */

/* Conditional group: compilation checked that there are no more than	/* Conditional group: compilation checked that there are no more than two
two branches. If the condition is false, skipping the first branch takes us	branches. If the condition is false, skipping the first branch takes us
past the end if there is only one branch, but that's OK because that is	past the end of the item if there is only one branch, but that's exactly
exactly what going to the ket would do. */	what we want. */

case OP_COND:	case OP_COND:
case OP_SCOND:	case OP_SCOND:
codelink = GET(ecode, 1);

	/* The variable codelink will be added to ecode when the condition is
	false, to get to the second branch. Setting it to the offset to the ALT
	or KET, then incrementing ecode achieves this effect. We now have ecode
	pointing to the condition or callout. */

	codelink = GET(ecode, 1); /* Offset to the second branch */
	ecode += 1 + LINK_SIZE; /* From this opcode */

/* Because of the way auto-callout works during compile, a callout item is	/* Because of the way auto-callout works during compile, a callout item is
inserted between OP_COND and an assertion condition. */	inserted between OP_COND and an assertion condition. */

if (ecode[LINK_SIZE+1] == OP_CALLOUT)	if (*ecode == OP_CALLOUT)
{	{
if (PUBL(callout) != NULL)	if (PUBL(callout) != NULL)
{	{
PUBL(callout_block) cb;	PUBL(callout_block) cb;
cb.version = 2; /* Version 1 of the callout block */	cb.version = 2; /* Version 1 of the callout block */
cb.callout_number = ecode[LINK_SIZE+2];	cb.callout_number = ecode[1];
cb.offset_vector = md->offset_vector;	cb.offset_vector = md->offset_vector;
#if defined COMPILE_PCRE8	#if defined COMPILE_PCRE8
cb.subject = (PCRE_SPTR)md->start_subject;	cb.subject = (PCRE_SPTR)md->start_subject;
Line 1304 for (;;)	Line 1311 for (;;)
cb.subject_length = (int)(md->end_subject - md->start_subject);	cb.subject_length = (int)(md->end_subject - md->start_subject);
cb.start_match = (int)(mstart - md->start_subject);	cb.start_match = (int)(mstart - md->start_subject);
cb.current_position = (int)(eptr - md->start_subject);	cb.current_position = (int)(eptr - md->start_subject);
cb.pattern_position = GET(ecode, LINK_SIZE + 3);	cb.pattern_position = GET(ecode, 2);
cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);	cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
cb.capture_top = offset_top/2;	cb.capture_top = offset_top/2;
cb.capture_last = md->capture_last & CAPLMASK;	cb.capture_last = md->capture_last & CAPLMASK;
/* Internal change requires this for API compatibility. */	/* Internal change requires this for API compatibility. */
Line 1315 for (;;)	Line 1322 for (;;)
if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);	if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);	if (rrc < 0) RRETURN(rrc);
}	}

	/* Advance ecode past the callout, so it now points to the condition. We
	must adjust codelink so that the value of ecode+codelink is unchanged. */

ecode += PRIV(OP_lengths)[OP_CALLOUT];	ecode += PRIV(OP_lengths)[OP_CALLOUT];
codelink -= PRIV(OP_lengths)[OP_CALLOUT];	codelink -= PRIV(OP_lengths)[OP_CALLOUT];
}	}

condcode = ecode[LINK_SIZE+1];	/* Test the various possible conditions */

/* Now see what the actual condition is */	condition = FALSE;
	switch(condcode = *ecode)
if (condcode == OP_RREF \|\| condcode == OP_NRREF) /* Recursion test */
{	{
if (md->recursive == NULL) /* Not recursing => FALSE */	case OP_RREF: /* Numbered group recursion test */
	if (md->recursive != NULL) /* Not recursing => FALSE */
{	{
condition = FALSE;	unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
ecode += GET(ecode, 1);
}
else
{
unsigned int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
condition = (recno == RREF_ANY \|\| recno == md->recursive->group_num);	condition = (recno == RREF_ANY \|\| recno == md->recursive->group_num);
	}
	break;

/* If the test is for recursion into a specific subpattern, and it is	case OP_DNRREF: /* Duplicate named group recursion test */
false, but the test was set up by name, scan the table to see if the	if (md->recursive != NULL)
name refers to any other numbers, and test them. The condition is true	{
if any one is set. */	int count = GET2(ecode, 1 + IMM2_SIZE);
	pcre_uchar slot = md->name_table + GET2(ecode, 1) md->name_entry_size;
if (!condition && condcode == OP_NRREF)	while (count-- > 0)
{	{
pcre_uchar *slotA = md->name_table;	unsigned int recno = GET2(slot, 0);
for (i = 0; i < md->name_count; i++)	condition = recno == md->recursive->group_num;
{	if (condition) break;
if (GET2(slotA, 0) == recno) break;	slot += md->name_entry_size;
slotA += md->name_entry_size;
}

/* Found a name for the number - there can be only one; duplicate
names for different numbers are allowed, but not vice versa. First
scan down for duplicates. */

if (i < md->name_count)
{
pcre_uchar *slotB = slotA;
while (slotB > md->name_table)
{
slotB -= md->name_entry_size;
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
{
condition = GET2(slotB, 0) == md->recursive->group_num;
if (condition) break;
}
else break;
}

/* Scan up for duplicates */

if (!condition)
{
slotB = slotA;
for (i++; i < md->name_count; i++)
{
slotB += md->name_entry_size;
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
{
condition = GET2(slotB, 0) == md->recursive->group_num;
if (condition) break;
}
else break;
}
}
}
}	}

/* Chose branch according to the condition */

ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);
}	}
}	break;

else if (condcode == OP_CREF \|\| condcode == OP_NCREF) /* Group used test */	case OP_CREF: /* Numbered group used test */
{	offset = GET2(ecode, 1) << 1; /* Doubled ref number */
offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
condition = offset < offset_top && md->offset_vector[offset] >= 0;	condition = offset < offset_top && md->offset_vector[offset] >= 0;
	break;

/* If the numbered capture is unset, but the reference was by name,	case OP_DNCREF: /* Duplicate named group used test */
scan the table to see if the name refers to any other numbers, and test
them. The condition is true if any one is set. This is tediously similar
to the code above, but not close enough to try to amalgamate. */

if (!condition && condcode == OP_NCREF)
{	{
unsigned int refno = offset >> 1;	int count = GET2(ecode, 1 + IMM2_SIZE);
pcre_uchar *slotA = md->name_table;	pcre_uchar slot = md->name_table + GET2(ecode, 1) md->name_entry_size;
	while (count-- > 0)
for (i = 0; i < md->name_count; i++)
{	{
if (GET2(slotA, 0) == refno) break;	offset = GET2(slot, 0) << 1;
slotA += md->name_entry_size;	condition = offset < offset_top && md->offset_vector[offset] >= 0;
	if (condition) break;
	slot += md->name_entry_size;
}	}

/* Found a name for the number - there can be only one; duplicate names
for different numbers are allowed, but not vice versa. First scan down
for duplicates. */

if (i < md->name_count)
{
pcre_uchar *slotB = slotA;
while (slotB > md->name_table)
{
slotB -= md->name_entry_size;
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
{
offset = GET2(slotB, 0) << 1;
condition = offset < offset_top &&
md->offset_vector[offset] >= 0;
if (condition) break;
}
else break;
}

/* Scan up for duplicates */

if (!condition)
{
slotB = slotA;
for (i++; i < md->name_count; i++)
{
slotB += md->name_entry_size;
if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
{
offset = GET2(slotB, 0) << 1;
condition = offset < offset_top &&
md->offset_vector[offset] >= 0;
if (condition) break;
}
else break;
}
}
}
}	}
	break;

/* Chose branch according to the condition */	case OP_DEF: /* DEFINE - always false */
	break;

ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1);	/* The condition is an assertion. Call match() to evaluate it - setting
}	md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
	of an assertion. */

else if (condcode == OP_DEF) /* DEFINE - always false */	default:
{
condition = FALSE;
ecode += GET(ecode, 1);
}

/* The condition is an assertion. Call match() to evaluate it - setting
md->match_function_type to MATCH_CONDASSERT causes it to stop at the end of
an assertion. */

else
{
md->match_function_type = MATCH_CONDASSERT;	md->match_function_type = MATCH_CONDASSERT;
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3);	RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
if (rrc == MATCH_MATCH)	if (rrc == MATCH_MATCH)
{	{
if (md->end_offset_top > offset_top)	if (md->end_offset_top > offset_top)
offset_top = md->end_offset_top; /* Captures may have happened */	offset_top = md->end_offset_top; /* Captures may have happened */
condition = TRUE;	condition = TRUE;
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
	/* Advance ecode past the assertion to the start of the first branch,
	but adjust it so that the general choosing code below works. */

	ecode += GET(ecode, 1);
while (*ecode == OP_ALT) ecode += GET(ecode, 1);	while (*ecode == OP_ALT) ecode += GET(ecode, 1);
	ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
}	}

/* PCRE doesn't allow the effect of (*THEN) to escape beyond an	/* PCRE doesn't allow the effect of (*THEN) to escape beyond an
assertion; it is therefore treated as NOMATCH. */	assertion; it is therefore treated as NOMATCH. Any other return is an
	error. */

else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)	else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
{	{
RRETURN(rrc); /* Need braces because of following else */	RRETURN(rrc); /* Need braces because of following else */
}	}
else	break;
{
condition = FALSE;
ecode += codelink;
}
}	}

/* We are now at the branch that is to be obeyed. As there is only one, can	/* Choose branch according to the condition */
use tail recursion to avoid using another stack frame, except when there is
unlimited repeat of a possibly empty group. In the latter case, a recursive
call to match() is always required, unless the second alternative doesn't
exist, in which case we can just plough on. Note that, for compatibility
with Perl, the \| in a conditional group is NOT treated as creating two
alternatives. If a THEN is encountered in the branch, it propagates out to
the enclosing alternative (unless nested in a deeper set of alternatives,
of course). */

if (condition \|\| *ecode == OP_ALT)	ecode += condition? PRIV(OP_lengths)[condcode] : codelink;

	/* We are now at the branch that is to be obeyed. As there is only one, we
	can use tail recursion to avoid using another stack frame, except when
	there is unlimited repeat of a possibly empty group. In the latter case, a
	recursive call to match() is always required, unless the second alternative
	doesn't exist, in which case we can just plough on. Note that, for
	compatibility with Perl, the \| in a conditional group is NOT treated as
	creating two alternatives. If a THEN is encountered in the branch, it
	propagates out to the enclosing alternative (unless nested in a deeper set
	of alternatives, of course). */

	if (condition \|\| ecode[-(1+LINK_SIZE)] == OP_ALT)
{	{
if (op != OP_SCOND)	if (op != OP_SCOND)
{	{
ecode += 1 + LINK_SIZE;
goto TAIL_RECURSE;	goto TAIL_RECURSE;
}	}

md->match_function_type = MATCH_CBEGROUP;	md->match_function_type = MATCH_CBEGROUP;
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);	RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
RRETURN(rrc);	RRETURN(rrc);
}	}

Line 1523 for (;;)	Line 1442 for (;;)

else	else
{	{
ecode += 1 + LINK_SIZE;
}	}
break;	break;

Line 2089 for (;;)	Line 2007 for (;;)

if (*ecode == OP_KETRPOS)	if (*ecode == OP_KETRPOS)
{	{
	md->start_match_ptr = mstart; /* In case \K reset it */
md->end_match_ptr = eptr;	md->end_match_ptr = eptr;
md->end_offset_top = offset_top;	md->end_offset_top = offset_top;
RRETURN(MATCH_KETRPOS);	RRETURN(MATCH_KETRPOS);
Line 2656 for (;;)	Line 2575 for (;;)
RRETURN(MATCH_NOMATCH);	RRETURN(MATCH_NOMATCH);
break;	break;

case PT_SPACE: /* Perl space */	/* Perl space used to exclude VT, but from Perl 5.18 it is included,
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|	which means that Perl space and POSIX space are now identical. PCRE
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_FF \|\| c == CHAR_CR)	was changed at release 8.34. */
== (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;

	case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */	case PT_PXSPACE: /* POSIX space */
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z \|\|	switch(c)
c == CHAR_HT \|\| c == CHAR_NL \|\| c == CHAR_VT \|\|	{
c == CHAR_FF \|\| c == CHAR_CR)	HSPACE_CASES:
== (op == OP_NOTPROP))	VSPACE_CASES:
RRETURN(MATCH_NOMATCH);	if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
	break;

	default:
	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
	(op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
	break;
	}
break;	break;

case PT_WORD:	case PT_WORD:
Line 2742 for (;;)	Line 2666 for (;;)
similar code to character type repeats - written out again for speed.	similar code to character type repeats - written out again for speed.
However, if the referenced string is the empty string, always treat	However, if the referenced string is the empty string, always treat
it as matched, any number of times (otherwise there could be infinite	it as matched, any number of times (otherwise there could be infinite
loops). */	loops). If the reference is unset, there are two possibilities:

case OP_REF:
case OP_REFI:
caseless = op == OP_REFI;
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
ecode += 1 + IMM2_SIZE;

/* If the reference is unset, there are two possibilities:

(a) In the default, Perl-compatible state, set the length negative;	(a) In the default, Perl-compatible state, set the length negative;
this ensures that every attempt at a match fails. We can't just fail	this ensures that every attempt at a match fails. We can't just fail
here, because of the possibility of quantifiers with zero minima.	here, because of the possibility of quantifiers with zero minima.
Line 2760 for (;;)	Line 2676 for (;;)
so that the back reference matches an empty string.	so that the back reference matches an empty string.

Otherwise, set the length to the length of what was matched by the	Otherwise, set the length to the length of what was matched by the
referenced subpattern. */	referenced subpattern.

	The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
	or to a non-duplicated named group. For a duplicated named group, OP_DNREF
	and OP_DNREFI are used. In this case we must scan the list of groups to
	which the name refers, and use the first one that is set. */

	case OP_DNREF:
	case OP_DNREFI:
	caseless = op == OP_DNREFI;
	{
	int count = GET2(ecode, 1+IMM2_SIZE);
	pcre_uchar slot = md->name_table + GET2(ecode, 1) md->name_entry_size;
	ecode += 1 + 2*IMM2_SIZE;

	while (count-- > 0)
	{
	offset = GET2(slot, 0) << 1;
	if (offset < offset_top && md->offset_vector[offset] >= 0) break;
	slot += md->name_entry_size;
	}
	if (count < 0)
	length = (md->jscript_compat)? 0 : -1;
	else
	length = md->offset_vector[offset+1] - md->offset_vector[offset];
	}
	goto REF_REPEAT;

	case OP_REF:
	case OP_REFI:
	caseless = op == OP_REFI;
	offset = GET2(ecode, 1) << 1; /* Doubled ref number */
	ecode += 1 + IMM2_SIZE;
if (offset >= offset_top \|\| md->offset_vector[offset] < 0)	if (offset >= offset_top \|\| md->offset_vector[offset] < 0)
length = (md->jscript_compat)? 0 : -1;	length = (md->jscript_compat)? 0 : -1;
else	else
Line 2769 for (;;)	Line 2716 for (;;)

/* Set up for repetition, or handle the non-repeated case */	/* Set up for repetition, or handle the non-repeated case */

	REF_REPEAT:
switch (*ecode)	switch (*ecode)
{	{
case OP_CRSTAR:	case OP_CRSTAR:
Line 2917 for (;;)	Line 2865 for (;;)
case OP_CRMINPLUS:	case OP_CRMINPLUS:
case OP_CRQUERY:	case OP_CRQUERY:
case OP_CRMINQUERY:	case OP_CRMINQUERY:
	case OP_CRPOSSTAR:
	case OP_CRPOSPLUS:
	case OP_CRPOSQUERY:
c = *ecode++ - OP_CRSTAR;	c = *ecode++ - OP_CRSTAR;
minimize = (c & 1) != 0;	if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
	else possessive = TRUE;
min = rep_min[c]; /* Pick up values from tables; */	min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */	max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;	if (max == 0) max = INT_MAX;
Line 2926 for (;;)	Line 2878 for (;;)

case OP_CRRANGE:	case OP_CRRANGE:
case OP_CRMINRANGE:	case OP_CRMINRANGE:
	case OP_CRPOSRANGE:
minimize = (*ecode == OP_CRMINRANGE);	minimize = (*ecode == OP_CRMINRANGE);
	possessive = (*ecode == OP_CRPOSRANGE);
min = GET2(ecode, 1);	min = GET2(ecode, 1);
max = GET2(ecode, 1 + IMM2_SIZE);	max = GET2(ecode, 1 + IMM2_SIZE);
if (max == 0) max = INT_MAX;	if (max == 0) max = INT_MAX;
Line 3068 for (;;)	Line 3022 for (;;)
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;	if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
eptr += len;	eptr += len;
}	}

	if (possessive) continue; /* No backtracking */

for (;;)	for (;;)
{	{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);	RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
Line 3098 for (;;)	Line 3055 for (;;)
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;	if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
eptr++;	eptr++;
}	}

	if (possessive) continue; /* No backtracking */

while (eptr >= pp)	while (eptr >= pp)
{	{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);	RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
Line 3113 for (;;)	Line 3073 for (;;)
/* Control never gets here */	/* Control never gets here */


/* Match an extended character class. This opcode is encountered only	/* Match an extended character class. In the 8-bit library, this opcode is
when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8	encountered only when UTF-8 mode mode is supported. In the 16-bit and
mode, because Unicode properties are supported in non-UTF-8 mode. */	32-bit libraries, codepoints greater than 255 may be encountered even when
	UTF is not supported. */

#if defined SUPPORT_UTF \|\| !defined COMPILE_PCRE8	#if defined SUPPORT_UTF \|\| !defined COMPILE_PCRE8
case OP_XCLASS:	case OP_XCLASS:
Line 3131 for (;;)	Line 3092 for (;;)
case OP_CRMINPLUS:	case OP_CRMINPLUS:
case OP_CRQUERY:	case OP_CRQUERY:
case OP_CRMINQUERY:	case OP_CRMINQUERY:
	case OP_CRPOSSTAR:
	case OP_CRPOSPLUS:
	case OP_CRPOSQUERY:
c = *ecode++ - OP_CRSTAR;	c = *ecode++ - OP_CRSTAR;
minimize = (c & 1) != 0;	if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
	else possessive = TRUE;
min = rep_min[c]; /* Pick up values from tables; */	min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */	max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;	if (max == 0) max = INT_MAX;
Line 3140 for (;;)	Line 3105 for (;;)

case OP_CRRANGE:	case OP_CRRANGE:
case OP_CRMINRANGE:	case OP_CRMINRANGE:
	case OP_CRPOSRANGE:
minimize = (*ecode == OP_CRMINRANGE);	minimize = (*ecode == OP_CRMINRANGE);
	possessive = (*ecode == OP_CRPOSRANGE);
min = GET2(ecode, 1);	min = GET2(ecode, 1);
max = GET2(ecode, 1 + IMM2_SIZE);	max = GET2(ecode, 1 + IMM2_SIZE);
if (max == 0) max = INT_MAX;	if (max == 0) max = INT_MAX;
Line 3212 for (;;)	Line 3179 for (;;)
if (!PRIV(xclass)(c, data, utf)) break;	if (!PRIV(xclass)(c, data, utf)) break;
eptr += len;	eptr += len;
}	}

	if (possessive) continue; /* No backtracking */

for(;;)	for(;;)
{	{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);	RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
Line 3590 for (;;)	Line 3560 for (;;)
if (fc != cc && foc != cc) break;	if (fc != cc && foc != cc) break;
eptr++;	eptr++;
}	}

if (possessive) continue; /* No backtracking */	if (possessive) continue; /* No backtracking */
for (;;)	for (;;)
{	{
Line 3599 for (;;)	Line 3568 for (;;)
eptr--;	eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}	}
RRETURN(MATCH_NOMATCH);	/* Control never gets here */
}	}
/* Control never gets here */
}	}

/* Caseful comparisons (includes all multi-byte characters) */	/* Caseful comparisons (includes all multi-byte characters) */
Line 3657 for (;;)	Line 3625 for (;;)
eptr--;	eptr--;
if (rrc != MATCH_NOMATCH) RRETURN(rrc);	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
}	}
RRETURN(MATCH_NOMATCH);	/* Control never gets here */
}	}
}	}
/* Control never gets here */	/* Control never gets here */
Line 3942 for (;;)	Line 3910 for (;;)
eptr--;	eptr--;
}	}
}	}
	/* Control never gets here */
RRETURN(MATCH_NOMATCH);
}	}
/* Control never gets here */
}	}

/* Caseful comparisons */	/* Caseful comparisons */
Line 4079 for (;;)	Line 4045 for (;;)
eptr--;	eptr--;
}	}
}	}
	/* Control never gets here */
RRETURN(MATCH_NOMATCH);
}	}
}	}
/* Control never gets here */	/* Control never gets here */
Line 4262 for (;;)	Line 4227 for (;;)
}	}
break;	break;

	/* Perl space used to exclude VT, but from Perl 5.18 it is included,
	which means that Perl space and POSIX space are now identical. PCRE
	was changed at release 8.34. */

case PT_SPACE: /* Perl space */	case PT_SPACE: /* Perl space */
	case PT_PXSPACE: /* POSIX space */
for (i = 1; i <= min; i++)	for (i = 1; i <= min; i++)
{	{
if (eptr >= md->end_subject)	if (eptr >= md->end_subject)
Line 4271 for (;;)	Line 4241 for (;;)
RRETURN(MATCH_NOMATCH);	RRETURN(MATCH_NOMATCH);
}	}
GETCHARINCTEST(c, eptr);	GETCHARINCTEST(c, eptr);
if ((UCD_CATEGORY(c) == ucp_Z \|\| c == CHAR_HT \|\| c == CHAR_NL \|\|	switch(c)
c == CHAR_FF \|\| c == CHAR_CR)
== prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
break;

case PT_PXSPACE: /* POSIX space */
for (i = 1; i <= min; i++)
{
if (eptr >= md->end_subject)
{	{
SCHECK_PARTIAL();	HSPACE_CASES:
RRETURN(MATCH_NOMATCH);	VSPACE_CASES:
	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
	break;

	default:
	if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
	RRETURN(MATCH_NOMATCH);
	break;
}	}
GETCHARINCTEST(c, eptr);
if ((UCD_CATEGORY(c) == ucp_Z \|\| c == CHAR_HT \|\| c == CHAR_NL \|\|
c == CHAR_VT \|\| c == CHAR_FF \|\| c == CHAR_CR)
== prop_fail_result)
RRETURN(MATCH_NOMATCH);
}	}
break;	break;

Line 5010 for (;;)	Line 4972 for (;;)
}	}
/* Control never gets here */	/* Control never gets here */

case PT_SPACE: /* Perl space */	/* Perl space used to exclude VT, but from Perl 5.18 it is included,
for (fi = min;; fi++)	which means that Perl space and POSIX space are now identical. PCRE
{	was changed at release 8.34. */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(c, eptr);
if ((UCD_CATEGORY(c) == ucp_Z \|\| c == CHAR_HT \|\| c == CHAR_NL \|\|
c == CHAR_FF \|\| c == CHAR_CR)
== prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */

	case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */	case PT_PXSPACE: /* POSIX space */
for (fi = min;; fi++)	for (fi = min;; fi++)
{	{
Line 5041 for (;;)	Line 4989 for (;;)
RRETURN(MATCH_NOMATCH);	RRETURN(MATCH_NOMATCH);
}	}
GETCHARINCTEST(c, eptr);	GETCHARINCTEST(c, eptr);
if ((UCD_CATEGORY(c) == ucp_Z \|\| c == CHAR_HT \|\| c == CHAR_NL \|\|	switch(c)
c == CHAR_VT \|\| c == CHAR_FF \|\| c == CHAR_CR)	{
== prop_fail_result)	HSPACE_CASES:
RRETURN(MATCH_NOMATCH);	VSPACE_CASES:
	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
	break;

	default:
	if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
	RRETURN(MATCH_NOMATCH);
	break;
	}
}	}
/* Control never gets here */	/* Control never gets here */

Line 5097 for (;;)	Line 5053 for (;;)
case PT_UCNC:	case PT_UCNC:
for (fi = min;; fi++)	for (fi = min;; fi++)
{	{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);	RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);	if (fi >= max) RRETURN(MATCH_NOMATCH);
if (eptr >= md->end_subject)	if (eptr >= md->end_subject)
Line 5528 for (;;)	Line 5484 for (;;)
}	}
break;	break;

	/* Perl space used to exclude VT, but from Perl 5.18 it is included,
	which means that Perl space and POSIX space are now identical. PCRE
	was changed at release 8.34. */

case PT_SPACE: /* Perl space */	case PT_SPACE: /* Perl space */
	case PT_PXSPACE: /* POSIX space */
for (i = min; i < max; i++)	for (i = min; i < max; i++)
{	{
int len = 1;	int len = 1;
Line 5538 for (;;)	Line 5499 for (;;)
break;	break;
}	}
GETCHARLENTEST(c, eptr, len);	GETCHARLENTEST(c, eptr, len);
if ((UCD_CATEGORY(c) == ucp_Z \|\| c == CHAR_HT \|\| c == CHAR_NL \|\|	switch(c)
c == CHAR_FF \|\| c == CHAR_CR)	{
== prop_fail_result)	HSPACE_CASES:
	VSPACE_CASES:
	if (prop_fail_result) goto ENDLOOP99; /* Break the loop */
break;	break;
eptr+= len;
}
break;

case PT_PXSPACE: /* POSIX space */	break;
for (i = min; i < max; i++)	if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
{	goto ENDLOOP99; /* Break the loop */
int len = 1;
if (eptr >= md->end_subject)
{
SCHECK_PARTIAL();
break;	break;
}	}
GETCHARLENTEST(c, eptr, len);
if ((UCD_CATEGORY(c) == ucp_Z \|\| c == CHAR_HT \|\| c == CHAR_NL \|\|
c == CHAR_VT \|\| c == CHAR_FF \|\| c == CHAR_CR)
== prop_fail_result)
break;
eptr+= len;	eptr+= len;
}	}
	ENDLOOP99:
break;	break;

case PT_WORD:	case PT_WORD:
Line 5642 for (;;)	Line 5594 for (;;)
}	}
}	}

/* Match extended Unicode sequences. We will get here only if the	/* Match extended Unicode grapheme clusters. We will get here only if the
support is in the binary; otherwise a compile-time error occurs. */	support is in the binary; otherwise a compile-time error occurs. */

else if (ctype == OP_EXTUNI)	else if (ctype == OP_EXTUNI)
Line 5675 for (;;)	Line 5627 for (;;)
/* eptr is now past the end of the maximum run */	/* eptr is now past the end of the maximum run */

if (possessive) continue; /* No backtracking */	if (possessive) continue; /* No backtracking */

for(;;)	for(;;)
{	{
if (eptr == pp) goto TAIL_RECURSE;	int lgb, rgb;
	PCRE_PUCHAR fptr;

	if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);	RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);	if (rrc != MATCH_NOMATCH) RRETURN(rrc);

	/* Backtracking over an extended grapheme cluster involves inspecting
	the previous two characters (if present) to see if a break is
	permitted between them. */

eptr--;	eptr--;
for (;;) /* Move back over one extended */	if (!utf) c = *eptr; else
{	{
if (!utf) c = *eptr; else	BACKCHAR(eptr);
	GETCHAR(c, eptr);
	}
	rgb = UCD_GRAPHBREAK(c);

	for (;;)
	{
	if (eptr == pp) goto TAIL_RECURSE; /* At start of char run */
	fptr = eptr - 1;
	if (!utf) c = *fptr; else
{	{
BACKCHAR(eptr);	BACKCHAR(fptr);
GETCHAR(c, eptr);	GETCHAR(c, fptr);
}	}
if (UCD_CATEGORY(c) != ucp_M) break;	lgb = UCD_GRAPHBREAK(c);
eptr--;	if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
	eptr = fptr;
	rgb = lgb;
}	}
}	}
}	}
Line 6211 for (;;)	Line 6183 for (;;)
}	}
}	}

/* Get here if we can't make it match with any permitted repetitions */	/* Control never gets here */

RRETURN(MATCH_NOMATCH);
}	}
/* Control never gets here */

/* There's been some horrible disaster. Arrival here can only mean there is	/* There's been some horrible disaster. Arrival here can only mean there is
something seriously wrong in the code above or the OP_xxx definitions. */	something seriously wrong in the code above or the OP_xxx definitions. */
Line 6249 switch (frame->Xwhere)	Line 6218 switch (frame->Xwhere)
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)	LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
LBL(65) LBL(66)	LBL(65) LBL(66)
#if defined SUPPORT_UTF \|\| !defined COMPILE_PCRE8	#if defined SUPPORT_UTF \|\| !defined COMPILE_PCRE8
LBL(21)	LBL(20) LBL(21)
#endif	#endif
#ifdef SUPPORT_UTF	#ifdef SUPPORT_UTF
LBL(16) LBL(18) LBL(20)	LBL(16) LBL(18)
LBL(22) LBL(23) LBL(28) LBL(30)	LBL(22) LBL(23) LBL(28) LBL(30)
LBL(32) LBL(34) LBL(42) LBL(46)	LBL(32) LBL(34) LBL(42) LBL(46)
#ifdef SUPPORT_UCP	#ifdef SUPPORT_UCP
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)	LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)	LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
#endif /* SUPPORT_UCP */	#endif /* SUPPORT_UCP */
#endif /* SUPPORT_UTF */	#endif /* SUPPORT_UTF */
default:	default:
Line 6410 const pcre_uint8 *start_bits = NULL;	Line 6379 const pcre_uint8 *start_bits = NULL;
PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;	PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
PCRE_PUCHAR end_subject;	PCRE_PUCHAR end_subject;
PCRE_PUCHAR start_partial = NULL;	PCRE_PUCHAR start_partial = NULL;
PCRE_PUCHAR match_partial;	PCRE_PUCHAR match_partial = NULL;
PCRE_PUCHAR req_char_ptr = start_match - 1;	PCRE_PUCHAR req_char_ptr = start_match - 1;

const pcre_study_data *study;	const pcre_study_data *study;
Line 7178 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)	Line 7147 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)

/* Handle partial matches - disable any mark data */	/* Handle partial matches - disable any mark data */

if (start_partial != NULL)	if (match_partial != NULL)
{	{
DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));	DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
md->mark = NULL;	md->mark = NULL;

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1.1.4
changed lines
	Added in v.1.1.1.5